diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index 12c93b9e..24ac3cbe 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -5,14 +5,14 @@ body:
     id: system-info
     attributes:
       label: System Info
-      description: | 
+      description: |
         Please share your system info with us (`text-generation-launcher --env` if installed locally).
-        The full command line used that causes issues: 
+        The full command line used that causes issues:
         OS version:
         Rust version (if self-compiling, `cargo version`):
         Model being used (`curl 127.0.0.1:8080/info | jq`):
           If local model please explicit the kind of model and/or equivalents.
-        Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`): 
+        Hardware used (GPUs, how many, on which cloud) (`nvidia-smi`):
         Deployment specificities (Kubernetes, EKS, AKS, any particular deployments):
         The current version being used:
 
@@ -52,11 +52,11 @@ body:
 
       placeholder: |
         Steps to reproduce the behavior:
-          
+
           1.
           2.
           3.
-          
+
 
   - type: textarea
     id: expected-behavior
diff --git a/.github/ISSUE_TEMPLATE/feature-request.yml b/.github/ISSUE_TEMPLATE/feature-request.yml
index 5abc1565..f1a9135c 100644
--- a/.github/ISSUE_TEMPLATE/feature-request.yml
+++ b/.github/ISSUE_TEMPLATE/feature-request.yml
@@ -19,7 +19,7 @@ body:
       label: Motivation
       description: |
         Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
-        
+
 
   - type: textarea
     id: contribution
diff --git a/.github/workflows/autodocs.yml b/.github/workflows/autodocs.yml
index a981c09c..7c5c6eca 100644
--- a/.github/workflows/autodocs.yml
+++ b/.github/workflows/autodocs.yml
@@ -6,15 +6,15 @@ on:
 jobs:
   update_docs:
     runs-on: ubuntu-latest
-    
+
     steps:
     - name: Checkout code
       uses: actions/checkout@v2
-    
+
     - name: Install Launcher
       id: install-launcher
       run: cargo install --git https://github.com/${{ github.repository }} --branch ${{ github.head_ref }} text-generation-launcher
-    
+
     - name: Check launcher Docs are up-to-date
       run: |
         echo text-generation-launcher --help
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 395a0b6a..066ea889 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -146,11 +146,50 @@ jobs:
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
           cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache,mode=min
 
+  integration-tests:
+    concurrency:
+      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
+      cancel-in-progress: true
+    needs:
+      - start-runner
+      - build-and-push-image # Wait for the docker image to be built
+    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
+    env:
+      DOCKER_VOLUME: /cache
+    steps:
+      - uses: actions/checkout@v2
+      - name: Inject slug/short variables
+        uses: rlespinasse/github-slug-action@v4.4.1
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+      - name: Tailscale
+        uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
+        with:
+          authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
+      - name: Prepare disks
+        run: |
+          sudo mkfs -t ext4 /dev/nvme1n1
+          sudo mkdir ${{ env.DOCKER_VOLUME }}
+          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
+      - name: Install
+        run: |
+          make install-integration-tests
+      - name: Run tests
+        run: |
+          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
+          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
+          pytest -s -vv integration-tests
+
   build-and-push-image-rocm:
     concurrency:
       group: ${{ github.workflow }}-build-and-push-image-rocm-${{ github.head_ref || github.run_id }}
       cancel-in-progress: true
-    needs: start-runner # required to start the main job when the runner is ready
+    needs:
+      - start-runner
+      - build-and-push-image # Wait for the main docker image to be built
+      - integration-tests # Wait for the main integration-tests
     runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
     permissions:
       contents: write
@@ -235,43 +274,6 @@ jobs:
           cache-from: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
           cache-to: type=registry,ref=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:cache-rocm,mode=min
 
-  integration-tests:
-    concurrency:
-      group: ${{ github.workflow }}-${{ github.job }}-${{ github.head_ref || github.run_id }}
-      cancel-in-progress: true
-    needs:
-      - start-runner
-      - build-and-push-image # Wait for the docker image to be built
-      - build-and-push-image-rocm
-    runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
-    env:
-      DOCKER_VOLUME: /cache
-    steps:
-      - uses: actions/checkout@v2
-      - name: Inject slug/short variables
-        uses: rlespinasse/github-slug-action@v4.4.1
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: 3.9
-      - name: Tailscale
-        uses: tailscale/github-action@7bd8039bf25c23c4ab1b8d6e2cc2da2280601966
-        with:
-          authkey: ${{ secrets.TAILSCALE_AUTHKEY }}
-      - name: Prepare disks
-        run: |
-          sudo mkfs -t ext4 /dev/nvme1n1
-          sudo mkdir ${{ env.DOCKER_VOLUME }}
-          sudo mount /dev/nvme1n1 ${{ env.DOCKER_VOLUME }}
-      - name: Install
-        run: |
-          make install-integration-tests
-      - name: Run tests
-        run: |
-          export DOCKER_IMAGE=registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-${{ env.GITHUB_SHA_SHORT }}
-          export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
-          pytest -s -vv integration-tests
-
   stop-runner:
     name: Stop self-hosted EC2 runner
     needs:
diff --git a/.github/workflows/build_pr_documentation.yml b/.github/workflows/build_pr_documentation.yml
index b46216ec..a5ce39a5 100644
--- a/.github/workflows/build_pr_documentation.yml
+++ b/.github/workflows/build_pr_documentation.yml
@@ -16,4 +16,4 @@ jobs:
       commit_sha: ${{ github.event.pull_request.head.sha }}
       pr_number: ${{ github.event.number }}
       package: text-generation-inference
-      additional_args: --not_python_module 
+      additional_args: --not_python_module
diff --git a/.github/workflows/delete_doc_comment.yml b/.github/workflows/delete_doc_comment.yml
deleted file mode 100644
index 1cad807b..00000000
--- a/.github/workflows/delete_doc_comment.yml
+++ /dev/null
@@ -1,12 +0,0 @@
-name: Delete doc comment
-
-on:
-  pull_request:
-    types: [ closed ]
-
-
-jobs:
-  delete:
-    uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main
-    with:
-      pr_number: ${{ github.event.number }}
\ No newline at end of file
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 00000000..a5e50a79
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,14 @@
+name: 'Close stale issues and PRs'
+on:
+  schedule:
+    - cron: '30 1 * * *'
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/stale@v8
+        with:
+          stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
+          days-before-stale: 30
+          days-before-close: 5
diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index 311ee6b9..29ff6d45 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -33,11 +33,18 @@ jobs:
       - name: Install Rust
         uses: actions-rs/toolchain@v1
         with:
-          toolchain: 1.71.0
+          # Released on: 28 December, 2023
+          # Branched from master on: 10 November, 2023
+          # https://releases.rs/docs/1.75.0/
+          toolchain: 1.75.0
           override: true
           components: rustfmt, clippy
       - name: Install Protoc
         uses: arduino/setup-protoc@v1
+      - name: Clean unused files
+        run: |
+          sudo rm -rf /usr/local/lib/android # will release about 10 GB if you don't need Android
+          sudo rm -rf /usr/share/dotnet # will release about 20GB if you don't need .NET
       - name: Install sccache
         run: |
           curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache
@@ -68,12 +75,11 @@ jobs:
           pip install pytest
           export HUGGING_FACE_HUB_TOKEN=${{ secrets.HUGGING_FACE_HUB_TOKEN }}
           pytest -s -vv server/tests
-      - name: Run Rust fmt
+      - name: Pre-commit checks
         run: |
-          cargo fmt --check
-      - name: Run Rust clippy
-        run: |
-          cargo clippy
+          pip install pre-commit
+          pre-commit install
+          pre-commit run --all-files
       - name: Run Rust tests
         run: |
           cargo test
diff --git a/.github/workflows/upload_pr_documentation.yml b/.github/workflows/upload_pr_documentation.yml
index b984ead2..ae00bb51 100644
--- a/.github/workflows/upload_pr_documentation.yml
+++ b/.github/workflows/upload_pr_documentation.yml
@@ -13,4 +13,4 @@ jobs:
       package_name: text-generation-inference
     secrets:
       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
-      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
\ No newline at end of file
+      comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 20c9baee..b3ca772b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,12 @@
 target
 router/tokenizer.json
 *__pycache__*
+
+# ROCm auto-generated files
+*.hip
+server/exllamav2_kernels/exllamav2_kernels/hip/
+server/exllama_kernels/exllama_kernels/hip/
+server/exllama_kernels/exllama_kernels/hip_func/
+*_hip.cuh
+server/exllama_kernels/exllama_kernels/hip_buffers.cuh
+server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..45bc07a5
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,18 @@
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+    -   id: check-yaml
+    -   id: end-of-file-fixer
+    -   id: trailing-whitespace
+        exclude: docs/source/basic_tutorials/launcher.md
+-   repo: https://github.com/psf/black
+    rev: 24.2.0
+    hooks:
+    -   id: black
+-   repo: https://github.com/doublify/pre-commit-rust
+    rev: v1.0
+    hooks:
+    -   id: fmt
+    -   id: cargo-check
+    -   id: clippy
diff --git a/Cargo.lock b/Cargo.lock
index 04d42397..e5f47197 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -24,7 +24,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
 dependencies = [
  "cfg-if",
+ "getrandom",
  "once_cell",
+ "serde",
  "version_check",
  "zerocopy",
 ]
@@ -38,6 +40,12 @@ dependencies = [
  "memchr",
 ]
 
+[[package]]
+name = "aligned-vec"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4aa90d7ce82d4be67b64039a3d588d38dbcc6736577de4a847025ce5b0c468d1"
+
 [[package]]
 name = "anstream"
 version = "0.6.13"
@@ -88,9 +96,15 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.81"
+version = "1.0.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
+checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
+
+[[package]]
+name = "arbitrary"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
 
 [[package]]
 name = "arc-swap"
@@ -98,6 +112,23 @@ version = "1.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
 
+[[package]]
+name = "arg_enum_proc_macro"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.60",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
+
 [[package]]
 name = "async-rustls"
 version = "0.3.0"
@@ -128,18 +159,18 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
 name = "async-trait"
-version = "0.1.79"
+version = "0.1.80"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681"
+checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -148,6 +179,20 @@ version = "1.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
 
+[[package]]
+name = "av1-grain"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6678909d8c5d46a42abcf571271e15fdbc0a225e3646cf23762cd415046c78bf"
+dependencies = [
+ "anyhow",
+ "arrayvec",
+ "log",
+ "nom",
+ "num-rational",
+ "v_frame",
+]
+
 [[package]]
 name = "average"
 version = "0.14.2"
@@ -159,6 +204,15 @@ dependencies = [
  "num-traits",
 ]
 
+[[package]]
+name = "avif-serialize"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "876c75a42f6364451a033496a14c44bffe41f5f4a8236f697391f11024e596d2"
+dependencies = [
+ "arrayvec",
+]
+
 [[package]]
 name = "awaitdrop"
 version = "0.1.2"
@@ -265,6 +319,33 @@ version = "0.21.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
 
+[[package]]
+name = "base64"
+version = "0.22.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51"
+
+[[package]]
+name = "bit-set"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
+[[package]]
+name = "bit_field"
+version = "0.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc827186963e592360843fb5ba4b973e145841266c1357f7180c43526f2e5b61"
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -277,6 +358,12 @@ version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
 
+[[package]]
+name = "bitstream-io"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06c9989a51171e2e81038ab168b6ae22886fe9ded214430dbb4f41c28cf176da"
+
 [[package]]
 name = "block-buffer"
 version = "0.10.4"
@@ -287,16 +374,28 @@ dependencies = [
 ]
 
 [[package]]
-name = "bumpalo"
-version = "3.15.4"
+name = "built"
+version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa"
+checksum = "41bfbdb21256b87a8b5e80fab81a8eed158178e812fd7ba451907518b2742f16"
+
+[[package]]
+name = "bumpalo"
+version = "3.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
 
 [[package]]
 name = "bytecount"
-version = "0.6.7"
+version = "0.6.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205"
+checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
+
+[[package]]
+name = "bytemuck"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
 
 [[package]]
 name = "byteorder"
@@ -304,6 +403,12 @@ version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
+[[package]]
+name = "byteorder-lite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
+
 [[package]]
 name = "bytes"
 version = "1.6.0"
@@ -350,9 +455,24 @@ checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53"
 
 [[package]]
 name = "cc"
-version = "1.0.90"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
+checksum = "d32a725bc159af97c3e629873bb9f88fb8cf8a4867175f76dc987815ea07c83b"
+dependencies = [
+ "jobserver",
+ "libc",
+ "once_cell",
+]
+
+[[package]]
+name = "cfg-expr"
+version = "0.15.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d067ad48b8650848b989a59a86c6c36a995d02d2bf778d45c3c5d57bc2718f02"
+dependencies = [
+ "smallvec",
+ "target-lexicon",
+]
 
 [[package]]
 name = "cfg-if"
@@ -397,7 +517,7 @@ dependencies = [
  "heck 0.5.0",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -406,6 +526,12 @@ version = "0.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce"
 
+[[package]]
+name = "color_quant"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
+
 [[package]]
 name = "colorchoice"
 version = "1.0.0"
@@ -518,6 +644,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "crunchy"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
+
 [[package]]
 name = "crypto-common"
 version = "0.1.6"
@@ -675,9 +807,9 @@ dependencies = [
 
 [[package]]
 name = "either"
-version = "1.10.0"
+version = "1.11.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
+checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2"
 
 [[package]]
 name = "encode_unicode"
@@ -687,9 +819,9 @@ checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
 
 [[package]]
 name = "encoding_rs"
-version = "0.8.33"
+version = "0.8.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7268b386296a025e474d5140678f75d6de9493ae55a5d709eeb9dd08149945e1"
+checksum = "b45de904aa0b010bce2ab45264d0631681847fa7b6f2eaa7dab7619943bc4f59"
 dependencies = [
  "cfg-if",
 ]
@@ -719,12 +851,47 @@ dependencies = [
  "cc",
 ]
 
+[[package]]
+name = "exr"
+version = "1.72.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "887d93f60543e9a9362ef8a21beedd0a833c5d9610e18c67abe15a5963dcb1a4"
+dependencies = [
+ "bit_field",
+ "flume",
+ "half",
+ "lebe",
+ "miniz_oxide",
+ "rayon-core",
+ "smallvec",
+ "zune-inflate",
+]
+
+[[package]]
+name = "fancy-regex"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2"
+dependencies = [
+ "bit-set",
+ "regex",
+]
+
 [[package]]
 name = "fastrand"
 version = "2.0.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984"
 
+[[package]]
+name = "fdeflate"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f9bfee30e4dedf0ab8b422f03af778d9612b63f502710fc500a334ebe2de645"
+dependencies = [
+ "simd-adler32",
+]
+
 [[package]]
 name = "fixedbitset"
 version = "0.4.2"
@@ -753,6 +920,15 @@ version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "28a80e3145d8ad11ba0995949bbcf48b9df2be62772b3d351ef017dff6ecb853"
 
+[[package]]
+name = "flume"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181"
+dependencies = [
+ "spin 0.9.8",
+]
+
 [[package]]
 name = "fnv"
 version = "1.0.7"
@@ -783,6 +959,16 @@ dependencies = [
  "percent-encoding",
 ]
 
+[[package]]
+name = "fraction"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678"
+dependencies = [
+ "lazy_static",
+ "num",
+]
+
 [[package]]
 name = "futures"
 version = "0.3.30"
@@ -839,7 +1025,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -893,13 +1079,25 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.12"
+version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
+checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
 dependencies = [
  "cfg-if",
+ "js-sys",
  "libc",
  "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "gif"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3fb2d69b19215e18bb912fa30f7ce15846e301408695e44e0ef719f1da9e19f2"
+dependencies = [
+ "color_quant",
+ "weezl",
 ]
 
 [[package]]
@@ -920,9 +1118,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.3.25"
+version = "0.3.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fbd2820c5e49886948654ab546d0688ff24530286bdcf8fca3cefb16d4618eb"
+checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
 dependencies = [
  "bytes",
  "fnv",
@@ -937,6 +1135,16 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "half"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.12.3"
@@ -983,25 +1191,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2b780635574b3d92f036890d8373433d6f9fc7abb320ee42a5c25897fc8ed732"
 dependencies = [
  "dirs 5.0.1",
+ "futures",
  "indicatif",
  "log",
  "native-tls",
+ "num_cpus",
  "rand",
+ "reqwest",
  "serde",
  "serde_json",
  "thiserror",
+ "tokio",
  "ureq",
 ]
 
-[[package]]
-name = "home"
-version = "0.5.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3d1354bf6b7235cb4a0576c2619fd4ed18183f689b12b006a0ee7329eeff9a5"
-dependencies = [
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "hostname"
 version = "0.3.1"
@@ -1118,6 +1321,45 @@ dependencies = [
  "unicode-normalization",
 ]
 
+[[package]]
+name = "image"
+version = "0.25.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd54d660e773627692c524beaad361aca785a4f9f5730ce91f42aabe5bce3d11"
+dependencies = [
+ "bytemuck",
+ "byteorder",
+ "color_quant",
+ "exr",
+ "gif",
+ "image-webp",
+ "num-traits",
+ "png",
+ "qoi",
+ "ravif",
+ "rayon",
+ "rgb",
+ "tiff",
+ "zune-core",
+ "zune-jpeg",
+]
+
+[[package]]
+name = "image-webp"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d730b085583c4d789dfd07fdcf185be59501666a90c97c40162b37e4fdad272d"
+dependencies = [
+ "byteorder-lite",
+ "thiserror",
+]
+
+[[package]]
+name = "imgref"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126"
+
 [[package]]
 name = "indexmap"
 version = "1.9.3"
@@ -1180,12 +1422,32 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "interpolate_name"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3"
 
+[[package]]
+name = "iso8601"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "924e5d73ea28f59011fec52a0d12185d496a9b075d360657aed2a5707f701153"
+dependencies = [
+ "nom",
+]
+
 [[package]]
 name = "itertools"
 version = "0.10.5"
@@ -1204,12 +1466,36 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
+[[package]]
+name = "jobserver"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "jpeg-decoder"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5d4a7da358eff58addd2877a45865158f0d78c911d43a5784ceb7bbf52833b0"
+
 [[package]]
 name = "js-sys"
 version = "0.3.69"
@@ -1219,18 +1505,65 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "jsonschema"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978"
+dependencies = [
+ "ahash",
+ "anyhow",
+ "base64 0.21.7",
+ "bytecount",
+ "clap",
+ "fancy-regex",
+ "fraction",
+ "getrandom",
+ "iso8601",
+ "itoa",
+ "memchr",
+ "num-cmp",
+ "once_cell",
+ "parking_lot",
+ "percent-encoding",
+ "regex",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "time",
+ "url",
+ "uuid",
+]
+
 [[package]]
 name = "lazy_static"
 version = "1.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
+[[package]]
+name = "lebe"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03087c2bad5e1034e8cace5926dec053fb3790248370865f5117a7d0213354c8"
+
 [[package]]
 name = "libc"
 version = "0.2.153"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
 
+[[package]]
+name = "libfuzzer-sys"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7"
+dependencies = [
+ "arbitrary",
+ "cc",
+ "once_cell",
+]
+
 [[package]]
 name = "libm"
 version = "0.2.8"
@@ -1269,6 +1602,15 @@ version = "0.4.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
 
+[[package]]
+name = "loop9"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
+dependencies = [
+ "imgref",
+]
+
 [[package]]
 name = "mach2"
 version = "0.4.2"
@@ -1315,6 +1657,16 @@ version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
 
+[[package]]
+name = "maybe-rayon"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
+dependencies = [
+ "cfg-if",
+ "rayon",
+]
+
 [[package]]
 name = "memchr"
 version = "2.7.2"
@@ -1358,7 +1710,7 @@ checksum = "38b4faf00617defe497754acde3024865bc143d44a86799b24e191ecff91354f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1392,6 +1744,14 @@ dependencies = [
  "unicase",
 ]
 
+[[package]]
+name = "minijinja"
+version = "1.0.12"
+source = "git+https://github.com/mitsuhiko/minijinja.git?rev=5cd4efb#5cd4efb9e2639247df275fe6e22a5dbe0ce71b28"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "minimal-lexical"
 version = "0.2.1"
@@ -1405,6 +1765,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
 dependencies = [
  "adler",
+ "simd-adler32",
 ]
 
 [[package]]
@@ -1421,9 +1782,9 @@ dependencies = [
 
 [[package]]
 name = "monostate"
-version = "0.1.11"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "878c2a1f1c70e5724fa28f101ca787b6a7e8ad5c5e4ae4ca3b0fa4a419fa9075"
+checksum = "a20fffcd8ca4c69d31e036a71abc400147b41f90895df4edcb36497a1f8af8bf"
 dependencies = [
  "monostate-impl",
  "serde",
@@ -1431,20 +1792,20 @@ dependencies = [
 
 [[package]]
 name = "monostate-impl"
-version = "0.1.11"
+version = "0.1.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f686d68a09079e63b1d2c64aa305095887ce50565f00a922ebfaeeee0d9ba6ce"
+checksum = "bf307cbbbd777a9c10cec88ddafee572b3484caad5cce0c9236523c3803105a6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
 name = "multimap"
-version = "0.8.3"
+version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a"
+checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03"
 
 [[package]]
 name = "muxado"
@@ -1483,6 +1844,12 @@ dependencies = [
  "tempfile",
 ]
 
+[[package]]
+name = "new_debug_unreachable"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
+
 [[package]]
 name = "ngrok"
 version = "0.13.1"
@@ -1542,6 +1909,12 @@ dependencies = [
  "minimal-lexical",
 ]
 
+[[package]]
+name = "noop_proc_macro"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
+
 [[package]]
 name = "ntapi"
 version = "0.4.1"
@@ -1561,12 +1934,95 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "num"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
+
+[[package]]
+name = "num-bigint"
+version = "0.4.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-cmp"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa"
+
+[[package]]
+name = "num-complex"
+version = "0.4.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6"
+dependencies = [
+ "num-traits",
+]
+
 [[package]]
 name = "num-conv"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
 
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.60",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-iter"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0"
+dependencies = [
+ "autocfg",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-traits"
 version = "0.2.18"
@@ -1662,7 +2118,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1878,7 +2334,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1899,6 +2355,19 @@ version = "0.3.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
 
+[[package]]
+name = "png"
+version = "0.17.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06e4b0d3d1312775e782c86c91a111aa1f910cbb65e1337f9975b5f9a554b5e1"
+dependencies = [
+ "bitflags 1.3.2",
+ "crc32fast",
+ "fdeflate",
+ "flate2",
+ "miniz_oxide",
+]
+
 [[package]]
 name = "portable-atomic"
 version = "1.6.0"
@@ -1919,12 +2388,12 @@ checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
 
 [[package]]
 name = "prettyplease"
-version = "0.2.17"
+version = "0.2.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7"
+checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550"
 dependencies = [
  "proc-macro2",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -1953,13 +2422,32 @@ dependencies = [
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.79"
+version = "1.0.81"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
+checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba"
 dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "profiling"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d84d1d7a6ac92673717f9f6d1518374ef257669c24ebc5ac25d5033828be58"
+dependencies = [
+ "profiling-procmacros",
+]
+
+[[package]]
+name = "profiling-procmacros"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8021cf59c8ec9c432cfc2526ac6b8aa508ecaf29cd415f271b8406c1b851c3fd"
+dependencies = [
+ "quote",
+ "syn 2.0.60",
+]
+
 [[package]]
 name = "prost"
 version = "0.11.9"
@@ -1972,34 +2460,33 @@ dependencies = [
 
 [[package]]
 name = "prost"
-version = "0.12.3"
+version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a"
+checksum = "d0f5d036824e4761737860779c906171497f6d55681139d8312388f8fe398922"
 dependencies = [
  "bytes",
- "prost-derive 0.12.3",
+ "prost-derive 0.12.4",
 ]
 
 [[package]]
 name = "prost-build"
-version = "0.12.3"
+version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c55e02e35260070b6f716a2423c2ff1c3bb1642ddca6f99e1f26d06268a0e2d2"
+checksum = "80b776a1b2dc779f5ee0641f8ade0125bc1298dd41a9a0c16d8bd57b42d222b1"
 dependencies = [
  "bytes",
- "heck 0.4.1",
- "itertools 0.11.0",
+ "heck 0.5.0",
+ "itertools 0.12.1",
  "log",
  "multimap",
  "once_cell",
  "petgraph",
  "prettyplease",
- "prost 0.12.3",
+ "prost 0.12.4",
  "prost-types",
  "regex",
- "syn 2.0.58",
+ "syn 2.0.60",
  "tempfile",
- "which",
 ]
 
 [[package]]
@@ -2017,24 +2504,33 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.12.3"
+version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
+checksum = "19de2de2a00075bf566bee3bd4db014b11587e84184d3f7a791bc17f1a8e9e48"
 dependencies = [
  "anyhow",
- "itertools 0.11.0",
+ "itertools 0.12.1",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
 name = "prost-types"
-version = "0.12.3"
+version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "193898f59edcf43c26227dcd4c8427f00d99d61e95dcde58dabd49fa291d470e"
+checksum = "3235c33eb02c1f1e212abdbe34c78b264b038fb58ca612664343271e36e55ffe"
 dependencies = [
- "prost 0.12.3",
+ "prost 0.12.4",
+]
+
+[[package]]
+name = "qoi"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
+dependencies = [
+ "bytemuck",
 ]
 
 [[package]]
@@ -2054,10 +2550,16 @@ dependencies = [
 ]
 
 [[package]]
-name = "quote"
-version = "1.0.35"
+name = "quick-error"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
+[[package]]
+name = "quote"
+version = "1.0.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
 dependencies = [
  "proc-macro2",
 ]
@@ -2109,6 +2611,56 @@ dependencies = [
  "unicode-width",
 ]
 
+[[package]]
+name = "rav1e"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd87ce80a7665b1cce111f8a16c1f3929f6547ce91ade6addf4ec86a8dda5ce9"
+dependencies = [
+ "arbitrary",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "av1-grain",
+ "bitstream-io",
+ "built",
+ "cfg-if",
+ "interpolate_name",
+ "itertools 0.12.1",
+ "libc",
+ "libfuzzer-sys",
+ "log",
+ "maybe-rayon",
+ "new_debug_unreachable",
+ "noop_proc_macro",
+ "num-derive",
+ "num-traits",
+ "once_cell",
+ "paste",
+ "profiling",
+ "rand",
+ "rand_chacha",
+ "simd_helpers",
+ "system-deps",
+ "thiserror",
+ "v_frame",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "ravif"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bc13288f5ab39e6d7c9d501759712e6969fcc9734220846fc9ed26cae2cc4234"
+dependencies = [
+ "avif-serialize",
+ "imgref",
+ "loop9",
+ "quick-error",
+ "rav1e",
+ "rayon",
+ "rgb",
+]
+
 [[package]]
 name = "raw-cpuid"
 version = "10.7.0"
@@ -2259,6 +2811,15 @@ dependencies = [
  "winreg",
 ]
 
+[[package]]
+name = "rgb"
+version = "0.8.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8"
+dependencies = [
+ "bytemuck",
+]
+
 [[package]]
 name = "ring"
 version = "0.16.20"
@@ -2310,7 +2871,7 @@ dependencies = [
  "quote",
  "rust-embed-utils",
  "shellexpand",
- "syn 2.0.58",
+ "syn 2.0.60",
  "walkdir",
 ]
 
@@ -2341,9 +2902,9 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.32"
+version = "0.38.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89"
+checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f"
 dependencies = [
  "bitflags 2.5.0",
  "errno",
@@ -2366,9 +2927,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.22.3"
+version = "0.22.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c"
+checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432"
 dependencies = [
  "log",
  "ring 0.17.8",
@@ -2389,15 +2950,15 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.4.1"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247"
+checksum = "beb461507cee2c2ff151784c52762cf4d9ff6a61f3e80968600ed24fa837fa54"
 
 [[package]]
 name = "rustls-webpki"
-version = "0.102.2"
+version = "0.102.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610"
+checksum = "f3bce581c0dd41bce533ce695a1437fa16a7ab5ac3ccfa99fe1a620a7885eabf"
 dependencies = [
  "ring 0.17.8",
  "rustls-pki-types",
@@ -2406,9 +2967,9 @@ dependencies = [
 
 [[package]]
 name = "rustversion"
-version = "1.0.14"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4"
+checksum = "80af6f9131f277a45a3fba6ce8e2258037bb0477a67e610d3c1fe046ab31de47"
 
 [[package]]
 name = "ryu"
@@ -2484,29 +3045,29 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.197"
+version = "1.0.198"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
+checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.197"
+version = "1.0.198"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
+checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.115"
+version = "1.0.116"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd"
+checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813"
 dependencies = [
  "itoa",
  "ryu",
@@ -2523,6 +3084,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_spanned"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb3622f419d1296904700073ea6cc23ad690adbd66f13ea683df73298736f0c1"
+dependencies = [
+ "serde",
+]
+
 [[package]]
 name = "serde_urlencoded"
 version = "0.7.1"
@@ -2587,13 +3157,28 @@ dependencies = [
 
 [[package]]
 name = "signal-hook-registry"
-version = "1.4.1"
+version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1"
+checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
 dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
+[[package]]
+name = "simd_helpers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
+dependencies = [
+ "quote",
+]
+
 [[package]]
 name = "sketches-ddsketch"
 version = "0.2.2"
@@ -2645,6 +3230,9 @@ name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
+dependencies = [
+ "lock_api",
+]
 
 [[package]]
 name = "spm_precompiled"
@@ -2689,7 +3277,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -2711,9 +3299,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.58"
+version = "2.0.60"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
+checksum = "909518bc7b1c9b779f1bbf07f2929d35af9f0f37e47c6e9ef7f9dddc1e1821f3"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2728,9 +3316,9 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
 
 [[package]]
 name = "sysinfo"
-version = "0.30.8"
+version = "0.30.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b1a378e48fb3ce3a5cf04359c456c9c98ff689bcf1c1bc6e6a31f247686f275"
+checksum = "87341a165d73787554941cd5ef55ad728011566fe714e987d1b976c15dbc3a83"
 dependencies = [
  "cfg-if",
  "core-foundation-sys",
@@ -2761,6 +3349,19 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "system-deps"
+version = "6.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a3e535eb8dded36d55ec13eddacd30dec501792ff23a0b1682c38601b8cf2349"
+dependencies = [
+ "cfg-expr",
+ "heck 0.5.0",
+ "pkg-config",
+ "toml",
+ "version-compare",
+]
+
 [[package]]
 name = "tabled"
 version = "0.14.0"
@@ -2785,6 +3386,12 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "target-lexicon"
+version = "0.12.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
+
 [[package]]
 name = "tempfile"
 version = "3.10.1"
@@ -2799,7 +3406,7 @@ dependencies = [
 
 [[package]]
 name = "text-generation-benchmark"
-version = "1.2.0"
+version = "2.0.0"
 dependencies = [
  "average",
  "clap",
@@ -2812,7 +3419,7 @@ dependencies = [
  "tabled",
  "text-generation-client",
  "thiserror",
- "tokenizers",
+ "tokenizers 0.14.1",
  "tokio",
  "tracing",
  "tracing-subscriber",
@@ -2820,11 +3427,11 @@ dependencies = [
 
 [[package]]
 name = "text-generation-client"
-version = "1.2.0"
+version = "2.0.0"
 dependencies = [
  "futures",
  "grpc-metadata",
- "prost 0.12.3",
+ "prost 0.12.4",
  "prost-build",
  "rand",
  "thiserror",
@@ -2837,12 +3444,14 @@ dependencies = [
 
 [[package]]
 name = "text-generation-launcher"
-version = "1.2.0"
+version = "2.0.0"
 dependencies = [
  "clap",
  "ctrlc",
  "float_eq",
+ "hf-hub",
  "nix",
+ "once_cell",
  "reqwest",
  "serde",
  "serde_json",
@@ -2853,28 +3462,35 @@ dependencies = [
 
 [[package]]
 name = "text-generation-router"
-version = "1.2.0"
+version = "2.0.0"
 dependencies = [
  "async-stream",
  "axum",
  "axum-tracing-opentelemetry",
+ "base64 0.22.0",
  "clap",
  "futures",
+ "futures-util",
  "hf-hub",
+ "image",
  "init-tracing-opentelemetry",
+ "jsonschema",
  "metrics",
  "metrics-exporter-prometheus",
+ "minijinja",
  "ngrok",
  "nohash-hasher",
+ "once_cell",
  "opentelemetry",
  "opentelemetry-otlp",
  "rand",
+ "regex",
  "reqwest",
  "serde",
  "serde_json",
  "text-generation-client",
  "thiserror",
- "tokenizers",
+ "tokenizers 0.15.2",
  "tokio",
  "tokio-stream",
  "tower-http",
@@ -2888,22 +3504,22 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "1.0.58"
+version = "1.0.59"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
+checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.58"
+version = "1.0.59"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
+checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -2917,10 +3533,21 @@ dependencies = [
 ]
 
 [[package]]
-name = "time"
-version = "0.3.34"
+name = "tiff"
+version = "0.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
+checksum = "ba1310fcea54c6a9a4fd1aad794ecc02c31682f6bfbecdf460bf19533eed1e3e"
+dependencies = [
+ "flate2",
+ "jpeg-decoder",
+ "weezl",
+]
+
+[[package]]
+name = "time"
+version = "0.3.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
 dependencies = [
  "deranged",
  "itoa",
@@ -2941,9 +3568,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
 
 [[package]]
 name = "time-macros"
-version = "0.2.17"
+version = "0.2.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
+checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf"
 dependencies = [
  "num-conv",
  "time-core",
@@ -2998,6 +3625,40 @@ dependencies = [
  "unicode_categories",
 ]
 
+[[package]]
+name = "tokenizers"
+version = "0.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3dd47962b0ba36e7fd33518fbf1754d136fd1474000162bbf2a8b5fcb2d3654d"
+dependencies = [
+ "aho-corasick",
+ "clap",
+ "derive_builder",
+ "esaxx-rs",
+ "getrandom",
+ "hf-hub",
+ "indicatif",
+ "itertools 0.12.1",
+ "lazy_static",
+ "log",
+ "macro_rules_attribute",
+ "monostate",
+ "onig",
+ "paste",
+ "rand",
+ "rayon",
+ "rayon-cond",
+ "regex",
+ "regex-syntax 0.8.3",
+ "serde",
+ "serde_json",
+ "spm_precompiled",
+ "thiserror",
+ "unicode-normalization-alignments",
+ "unicode-segmentation",
+ "unicode_categories",
+]
+
 [[package]]
 name = "tokio"
 version = "1.37.0"
@@ -3035,7 +3696,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3085,6 +3746,40 @@ dependencies = [
  "tracing",
 ]
 
+[[package]]
+name = "toml"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3"
+dependencies = [
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_edit",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3550f4e9685620ac18a50ed434eb3aec30db8ba93b0287467bca5826ea25baf1"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "toml_edit"
+version = "0.22.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3328d4f68a705b2a4498da1d580585d39a6510f98318a2cec3018a7ec61ddef"
+dependencies = [
+ "indexmap 2.2.6",
+ "serde",
+ "serde_spanned",
+ "toml_datetime",
+ "winnow",
+]
+
 [[package]]
 name = "tonic"
 version = "0.9.2"
@@ -3131,7 +3826,7 @@ dependencies = [
  "hyper-timeout",
  "percent-encoding",
  "pin-project",
- "prost 0.12.3",
+ "prost 0.12.4",
  "tokio",
  "tokio-stream",
  "tower",
@@ -3150,7 +3845,7 @@ dependencies = [
  "proc-macro2",
  "prost-build",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3223,7 +3918,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3401,16 +4096,16 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
 
 [[package]]
 name = "ureq"
-version = "2.9.6"
+version = "2.9.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11f214ce18d8b2cbe84ed3aa6486ed3f5b285cf8d8fbdbce9f3f767a724adc35"
+checksum = "d11a831e3c0b56e438a28308e7c810799e3c118417f342d30ecec080105395cd"
 dependencies = [
- "base64 0.21.7",
+ "base64 0.22.0",
  "flate2",
  "log",
  "native-tls",
  "once_cell",
- "rustls 0.22.3",
+ "rustls 0.22.4",
  "rustls-pki-types",
  "rustls-webpki",
  "serde",
@@ -3464,7 +4159,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "regex",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3483,6 +4178,23 @@ dependencies = [
  "zip",
 ]
 
+[[package]]
+name = "uuid"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0"
+
+[[package]]
+name = "v_frame"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f32aaa24bacd11e488aa9ba66369c7cd514885742c9fe08cfe85884db3e92b"
+dependencies = [
+ "aligned-vec",
+ "num-traits",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "valuable"
 version = "0.1.0"
@@ -3511,6 +4223,12 @@ dependencies = [
  "time",
 ]
 
+[[package]]
+name = "version-compare"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "852e951cb7832cb45cb1169900d19760cfa39b82bc0ea9c0e5a14ae88411c98b"
+
 [[package]]
 name = "version_check"
 version = "0.9.4"
@@ -3563,7 +4281,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "wasm-bindgen-shared",
 ]
 
@@ -3597,7 +4315,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -3638,16 +4356,10 @@ dependencies = [
 ]
 
 [[package]]
-name = "which"
-version = "4.4.2"
+name = "weezl"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
-dependencies = [
- "either",
- "home",
- "once_cell",
- "rustix",
-]
+checksum = "53a85b86a771b1c87058196170769dd264f66c0782acf1ae6cc51bfd64b39082"
 
 [[package]]
 name = "winapi"
@@ -3667,11 +4379,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
 name = "winapi-util"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
+checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
 dependencies = [
- "winapi",
+ "windows-sys 0.52.0",
 ]
 
 [[package]]
@@ -3687,7 +4399,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
 dependencies = [
  "windows-core",
- "windows-targets 0.52.4",
+ "windows-targets 0.52.5",
 ]
 
 [[package]]
@@ -3696,7 +4408,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
 dependencies = [
- "windows-targets 0.52.4",
+ "windows-targets 0.52.5",
 ]
 
 [[package]]
@@ -3723,7 +4435,7 @@ version = "0.52.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 dependencies = [
- "windows-targets 0.52.4",
+ "windows-targets 0.52.5",
 ]
 
 [[package]]
@@ -3758,17 +4470,18 @@ dependencies = [
 
 [[package]]
 name = "windows-targets"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
+checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
 dependencies = [
- "windows_aarch64_gnullvm 0.52.4",
- "windows_aarch64_msvc 0.52.4",
- "windows_i686_gnu 0.52.4",
- "windows_i686_msvc 0.52.4",
- "windows_x86_64_gnu 0.52.4",
- "windows_x86_64_gnullvm 0.52.4",
- "windows_x86_64_msvc 0.52.4",
+ "windows_aarch64_gnullvm 0.52.5",
+ "windows_aarch64_msvc 0.52.5",
+ "windows_i686_gnu 0.52.5",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.5",
+ "windows_x86_64_gnu 0.52.5",
+ "windows_x86_64_gnullvm 0.52.5",
+ "windows_x86_64_msvc 0.52.5",
 ]
 
 [[package]]
@@ -3785,9 +4498,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
 
 [[package]]
 name = "windows_aarch64_gnullvm"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
+checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
 
 [[package]]
 name = "windows_aarch64_msvc"
@@ -3803,9 +4516,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
 
 [[package]]
 name = "windows_aarch64_msvc"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
+checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
 
 [[package]]
 name = "windows_i686_gnu"
@@ -3821,9 +4534,15 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
 
 [[package]]
 name = "windows_i686_gnu"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
+checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
 
 [[package]]
 name = "windows_i686_msvc"
@@ -3839,9 +4558,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
 
 [[package]]
 name = "windows_i686_msvc"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
+checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
 
 [[package]]
 name = "windows_x86_64_gnu"
@@ -3857,9 +4576,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
 
 [[package]]
 name = "windows_x86_64_gnu"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
+checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
@@ -3875,9 +4594,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
 
 [[package]]
 name = "windows_x86_64_gnullvm"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
+checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
 
 [[package]]
 name = "windows_x86_64_msvc"
@@ -3893,9 +4612,18 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
 
 [[package]]
 name = "windows_x86_64_msvc"
-version = "0.52.4"
+version = "0.52.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
+checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
+
+[[package]]
+name = "winnow"
+version = "0.6.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0c976aaaa0e1f90dbb21e9587cdaf1d9679a1cde8875c0d6bd83ab96a208352"
+dependencies = [
+ "memchr",
+]
 
 [[package]]
 name = "winreg"
@@ -3924,7 +4652,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.58",
+ "syn 2.0.60",
 ]
 
 [[package]]
@@ -3944,3 +4672,27 @@ dependencies = [
  "crossbeam-utils",
  "flate2",
 ]
+
+[[package]]
+name = "zune-core"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a"
+
+[[package]]
+name = "zune-inflate"
+version = "0.2.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "zune-jpeg"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec866b44a2a1fd6133d363f073ca1b179f438f99e7e5bfb1e33f7181facfe448"
+dependencies = [
+ "zune-core",
+]
diff --git a/Cargo.toml b/Cargo.toml
index 3677d2b6..ecb4878f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,7 @@ members = [
 resolver = "2"
 
 [workspace.package]
-version = "1.2.0"
+version = "2.0.0"
 edition = "2021"
 authors = ["Olivier Dehaene"]
 homepage = "https://github.com/huggingface/text-generation-inference"
@@ -17,5 +17,7 @@ homepage = "https://github.com/huggingface/text-generation-inference"
 [profile.release]
 debug = 1
 incremental = true
-lto = "off"
+lto = "fat"
+opt-level = 3
+codegen-units = 1
 panic = "abort"
diff --git a/Dockerfile b/Dockerfile
index 481bfb2a..c7c65769 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,7 +31,7 @@ COPY launcher launcher
 RUN cargo build --release
 
 # Text Generation Inference base image
-FROM vault.habana.ai/gaudi-docker/1.14.0/ubuntu22.04/habanalabs/pytorch-installer-2.1.1:latest as base
+FROM vault.habana.ai/gaudi-docker/1.15.0/ubuntu22.04/habanalabs/pytorch-installer-2.2.0:latest as base
 
 # Text Generation Inference base env
 ENV HUGGINGFACE_HUB_CACHE=/data \
@@ -58,8 +58,8 @@ COPY server/Makefile server/Makefile
 RUN cd server && \
     make gen-server && \
     pip install -r requirements.txt && \
-    bash ./dill-0.3.7-patch.sh && \
-    pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.14.0 && \
+    bash ./dill-0.3.8-patch.sh && \
+    pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.15.0 && \
     pip install . --no-cache-dir
 
 # Install benchmarker
@@ -72,5 +72,7 @@ COPY --from=builder /usr/src/target/release/text-generation-launcher /usr/local/
 # Final image
 FROM base
 
-ENTRYPOINT ["text-generation-launcher"]
+COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh
+
+ENTRYPOINT ["/tgi-entrypoint.sh"]
 CMD ["--json-output"]
diff --git a/Dockerfile_amd b/Dockerfile_amd
index dd331a5d..fb820116 100644
--- a/Dockerfile_amd
+++ b/Dockerfile_amd
@@ -1,5 +1,5 @@
 # Rust builder
-FROM lukemathwalker/cargo-chef:latest-rust-1.71 AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.75 AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -36,7 +36,7 @@ COPY launcher launcher
 RUN cargo build --release
 
 # Text Generation Inference base image for RoCm
-FROM rocm/dev-ubuntu-20.04:5.7 as base
+FROM rocm/dev-ubuntu-22.04:5.7 as base
 
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
     build-essential \
@@ -75,8 +75,8 @@ RUN chmod +x ~/mambaforge.sh && \
     mamba init && \
     rm ~/mambaforge.sh
 
-# Install PyTorch nightly (2.2.0.dev2023) compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6.
-RUN pip install --pre torch==2.2.0.dev20231106 --index-url https://download.pytorch.org/whl/nightly/rocm5.7
+# Install PyTorch 2.2 RC compiled against RoCm 5.7, as VLLM can not be compiled with RoCm 5.6.
+RUN pip install torch --index-url https://download.pytorch.org/whl/test/rocm5.7/
 
 FROM base AS kernel-builder
 
@@ -104,6 +104,20 @@ WORKDIR /usr/src
 COPY server/custom_kernels/ .
 RUN PYTORCH_ROCM_ARCH=gfx90a python setup.py build
 
+# Build exllama kernels
+FROM kernel-builder as exllama-kernels-builder
+WORKDIR /usr/src
+COPY server/exllama_kernels/ .
+
+RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build
+
+# Build exllama v2 kernels
+FROM kernel-builder as exllamav2-kernels-builder
+WORKDIR /usr/src
+COPY server/exllamav2_kernels/ .
+
+RUN PYTORCH_ROCM_ARCH="gfx90a" python setup.py build
+
 FROM base as base-copy
 
 # Text Generation Inference base env
@@ -120,6 +134,12 @@ COPY --from=flash-att-v2-builder /usr/src/flash-attention-v2/build/lib.linux-x86
 # Copy build artifacts from custom kernels builder
 COPY --from=custom-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
 
+# Copy build artifacts from exllama kernels builder
+COPY --from=exllama-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
+# Copy build artifacts from exllamav2 kernels builder
+COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
+
 # Install flash-attention dependencies
 RUN pip install einops --no-cache-dir
 
@@ -130,7 +150,7 @@ COPY server/Makefile server/Makefile
 RUN cd server && \
     make gen-server && \
     pip install -r requirements_rocm.txt && \
-    pip install ".[accelerate, peft]" --no-cache-dir
+    pip install ".[accelerate, peft, outlines]" --no-cache-dir
 
 # Install benchmarker
 COPY --from=builder /usr/src/target/release/text-generation-benchmark /usr/local/bin/text-generation-benchmark
diff --git a/LICENSE b/LICENSE
index 19a34fcf..7d0e8034 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,181 +1,201 @@
-Hugging Face Optimized Inference License 1.0 (HFOILv1.0)
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
 
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 
-This License Agreement governs the use of the Software and its Modifications. It is a
-binding agreement between the Licensor and You.
+   1. Definitions.
 
-This License Agreement shall be referred to as Hugging Face Optimized Inference License
-1.0 or HFOILv1.0. We may publish revised versions of this License Agreement from time to
-time. Each version will be given a distinguished number.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
 
-By downloading, accessing, modifying, distributing or otherwise using the Software, You
-consent to all of the terms and conditions below. So, if You do not agree with those,
-please do not download, access, modify, distribute, or use the Software.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
 
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
 
-1. PERMISSIONS
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
 
-You may use, modify and distribute the Software pursuant to the following terms and
-conditions:
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
 
-Copyright License. Subject to the terms and conditions of this License Agreement and where
-and as applicable, each Contributor hereby grants You a perpetual, worldwide,
-non-exclusive, royalty-free, copyright license to reproduce, prepare, publicly display,
-publicly perform, sublicense under the terms herein, and distribute the Software and
-Modifications of the Software.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
 
-Patent License. Subject to the terms and conditions of this License Agreement and where
-and as applicable, each Contributor hereby grants You a perpetual, worldwide,
-non-exclusive, royalty-free patent license to make, have made, Use, offer to sell, sell,
-import, and otherwise transfer the Software, where such license applies only to those
-patent claims licensable by such Contributor that are necessarily infringed by their
-Contribution(s) alone or by combination of their Contribution(s) with the Software to
-which such Contribution(s) was submitted. If You institute patent litigation against any
-entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Software
-or a Contribution incorporated within the Software constitutes direct or contributory
-patent infringement, then any rights granted to You under this License Agreement for the
-Software shall terminate as of the date such litigation is filed.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
 
-No other rights. All rights not expressly granted herein are retained.
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
 
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
 
-2. RESTRICTIONS
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
 
-You may not distribute the Software as a hosted or managed, and paid service, where the
-service grants users access to any substantial set of the features or functionality of the
-Software. If you wish to do so, You will need to be granted additional rights from the
-Licensor which will be subject to a separate mutually agreed agreement.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
 
-You may not sublicense the Software under any other terms than those listed in this
-License.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
 
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
 
-3. OBLIGATIONS
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
 
-When You modify the Software, You agree to: - attach a notice stating the Modifications of
-the Software You made; and - attach a notice stating that the Modifications of the
-Software are released under this License Agreement.
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
 
-When You distribute the Software or Modifications of the Software, You agree to: - give
-any recipients of the Software a copy of this License Agreement; - retain all Explanatory
-Documentation; and if sharing the Modifications of the Software, add Explanatory
-Documentation documenting the changes made to create the Modifications of the Software; -
-retain all copyright, patent, trademark and attribution notices.
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
 
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
 
-4. MISCELLANEOUS
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
 
-Termination. Licensor reserves the right to restrict Use of the Software in violation of
-this License Agreement, upon which Your licenses will automatically terminate.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
 
-Contributions. Unless You explicitly state otherwise, any Contribution intentionally
-submitted for inclusion in the Software by You to the Licensor shall be under the terms
-and conditions of this License, without any additional terms or conditions.
-Notwithstanding the above, nothing herein shall supersede or modify the terms of any
-separate license agreement you may have executed with Licensor regarding such
-Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
 
-Trademarks and related. Nothing in this License Agreement permits You (i) to make Use of
-Licensors’ trademarks, trade names, or logos, (ii) otherwise suggest endorsement by
-Licensor, or (iii) misrepresent the relationship between the parties; and any rights not
-expressly granted herein are reserved by the Licensors.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
 
-Output You generate. Licensor claims no rights in the Output. You agree not to contravene
-any provision as stated in the License Agreement with your Use of the Output.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
 
-Disclaimer of Warranty. Except as expressly provided otherwise herein, and to the fullest
-extent permitted by law, Licensor provides the Software (and each Contributor provides its
-Contributions) AS IS, and Licensor disclaims all warranties or guarantees of any kind,
-express or implied, whether arising under any law or from any usage in trade, or otherwise
-including but not limited to the implied warranties of merchantability, non-infringement,
-quiet enjoyment, fitness for a particular purpose, or otherwise. You are solely
-responsible for determining the appropriateness of the Software and Modifications of the
-Software for your purposes (including your use or distribution of the Software and
-Modifications of the Software), and assume any risks associated with Your exercise of
-permissions under this License Agreement.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
 
-Limitation of Liability. In no event and under no legal theory, whether in tort (including
-negligence), contract, or otherwise, unless required by applicable law (such as deliberate
-and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to
-You for damages, including any direct, indirect, special, incidental, or consequential
-damages of any character arising as a result of this License Agreement or out of the Use
-or inability to Use the Software (including but not limited to damages for loss of
-goodwill, work stoppage, computer failure or malfunction, model failure or malfunction, or
-any and all other commercial damages or losses), even if such Contributor has been advised
-of the possibility of such damages.
+   END OF TERMS AND CONDITIONS
 
-Accepting Warranty or Additional Liability. While sharing the Software or Modifications of
-the Software thereof, You may choose to offer and charge a fee for, acceptance of support,
-warranty, indemnity, or other liability obligations and/or rights consistent with this
-License Agreement. However, in accepting such obligations, You may act only on Your own
-behalf and on Your sole responsibility, not on behalf of Licensor or any other
-Contributor, and you hereby agree to indemnify, defend, and hold Licensor and each other
-Contributor (and their successors or assigns) harmless for any liability incurred by, or
-claims asserted against, such Licensor or Contributor (and their successors or assigns) by
-reason of your accepting any such warranty or additional liability.
+   APPENDIX: How to apply the Apache License to your work.
 
-Severability. This License Agreement is a license of copyright and patent rights and an
-agreement in contract between You and the Licensor. If any provision of this License
-Agreement is held to be invalid, illegal or unenforceable, the remaining provisions shall
-be unaffected thereby and remain valid as if such provision had not been set forth herein.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
 
+   Copyright 2022 Hugging Face
 
-5. DEFINITIONS
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
 
-“Contribution” refers to any work of authorship, including the original version of the
-Software and any Modifications of the Software that is intentionally submitted to Licensor
-for inclusion in the Software by the copyright owner or by an individual or entity
-authorized to submit on behalf of the copyright owner. For the purposes of this
-definition, “submitted” means any form of electronic, verbal, or written communication
-sent to the Licensor or its representatives, including but not limited to communication on
-electronic mailing lists, source code control systems, and issue tracking systems that are
-managed by, or on behalf of, the Licensor for the purpose of discussing and improving the
-Software, but excluding communication that is conspicuously marked or otherwise designated
-in writing by the copyright owner as “Not a Contribution.”
+       http://www.apache.org/licenses/LICENSE-2.0
 
-“Contributor” refers to Licensor and any individual or entity on behalf of whom a
-Contribution has been received by Licensor and subsequently incorporated within the
-Software.
-
-“Data” refers to a collection of information extracted from the dataset used with the
-Model, including to train, pretrain, or otherwise evaluate the Model. The Data is not
-licensed under this License Agreement.
-
-“Explanatory Documentation” refers to any documentation or related information including
-but not limited to model cards or data cards dedicated to inform the public about the
-characteristics of the Software. Explanatory documentation is not licensed under this
-License.
-
-"License Agreement" refers to these terms and conditions.
-
-“Licensor” refers to the rights owners or entity authorized by the rights owners that are
-granting the terms and conditions of this License Agreement.
-
-“Model” refers to machine-learning based assemblies (including checkpoints), consisting of
-learnt weights and parameters (including optimizer states), corresponding to a model
-architecture as embodied in Software source code. Source code is not licensed under this
-License Agreement.
-
-“Modifications of the Software” refers to all changes to the Software, including without
-limitation derivative works of the Software.
-
-“Output” refers to the results of operating the Software.
-
-“Share” refers to any transmission, reproduction, publication or other sharing of the
-Software or Modifications of the Software to a third party, including providing the
-Softwaire as a hosted service made available by electronic or other remote means,
-including - but not limited to - API-based or web access.
-
-“Software” refers to the software and Model (or parts of either) that Licensor makes
-available under this License Agreement.
-
-“Third Parties” refers to individuals or legal entities that are not under common control
-with Licensor or You.
-
-“Use” refers to anything You or your representatives do with the Software, including but
-not limited to generating any Output, fine tuning, updating, running, training, evaluating
-and/or reparametrizing the Model.
-
-"You" (or "Your")  refers to an individual or Legal Entity exercising permissions granted
-by this License Agreement and/or making Use of the Software for whichever purpose and in
-any field of Use.
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
index 843117d8..56f370a7 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene
    model=meta-llama/Llama-2-7b-hf
    volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-   docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model
+   docker run -p 8080:80 -v $volume:/data --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --max-input-length 1024 --max-total-tokens 2048
    ```
    > For gated models such as [LLama](https://huggingface.co/meta-llama) or [StarCoder](https://huggingface.co/bigcode/starcoder), you will have to pass `-e HUGGING_FACE_HUB_TOKEN=<token>` to the `docker run` command above with a valid Hugging Face Hub read token.
 
@@ -53,7 +53,7 @@ To use [🤗 text-generation-inference](https://github.com/huggingface/text-gene
    model=meta-llama/Llama-2-70b-hf
    volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-   docker run -p 8080:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --sharded true --num-shard 8
+   docker run -p 8080:80 -v $volume:/data --runtime=habana -e PT_HPU_ENABLE_LAZY_COLLECTIVES=true -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --ipc=host ghcr.io/huggingface/tgi-gaudi:1.2.1 --model-id $model --sharded true --num-shard 8 --max-input-length 1024 --max-total-tokens 2048
    ```
 3. You can then send a simple request:
    ```bash
diff --git a/assets/architecture.jpg b/assets/architecture.jpg
deleted file mode 100644
index c4a511c9..00000000
Binary files a/assets/architecture.jpg and /dev/null differ
diff --git a/assets/architecture.png b/assets/architecture.png
new file mode 100644
index 00000000..1bcd1283
Binary files /dev/null and b/assets/architecture.png differ
diff --git a/benchmark/Cargo.toml b/benchmark/Cargo.toml
index 2dd2e64d..40738c4d 100644
--- a/benchmark/Cargo.toml
+++ b/benchmark/Cargo.toml
@@ -29,4 +29,3 @@ tui = {package = "ratatui", version = "0.23", default-features = false, features
 tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
 hf-hub = "0.3.1"
-
diff --git a/benchmark/README.md b/benchmark/README.md
index 7f51a731..17a02a30 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -6,12 +6,12 @@
 
 </div>
 
-A lightweight benchmarking tool based inspired by [oha](https://github.com/hatoo/oha) 
+A lightweight benchmarking tool based inspired by [oha](https://github.com/hatoo/oha)
 and powered by [tui](https://github.com/tui-rs-revival/ratatui).
 
-## Install 
+## Install
 
-```shell 
+```shell
 make install-benchmark
 ```
 
@@ -27,4 +27,4 @@ Then run the benchmarking tool:
 
 ```shell
 text-generation-benchmark --tokenizer-name bigscience/bloom-560m
-```
\ No newline at end of file
+```
diff --git a/benchmark/src/app.rs b/benchmark/src/app.rs
index 49654c1b..48ac976a 100644
--- a/benchmark/src/app.rs
+++ b/benchmark/src/app.rs
@@ -444,7 +444,7 @@ fn progress_gauge(title: &str, label: String, progress: f64, color: Color) -> Ga
 }
 
 /// Throughput paragraph
-fn throughput_paragraph<'a>(throughput: &Vec<f64>, name: &'static str) -> Paragraph<'a> {
+fn throughput_paragraph<'a>(throughput: &[f64], name: &'static str) -> Paragraph<'a> {
     // Throughput average/high/low texts
     let throughput_texts = statis_spans(throughput, "tokens/secs");
 
@@ -457,7 +457,7 @@ fn throughput_paragraph<'a>(throughput: &Vec<f64>, name: &'static str) -> Paragr
 }
 
 /// Latency paragraph
-fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragraph<'a> {
+fn latency_paragraph<'a>(latency: &mut [f64], name: &'static str) -> Paragraph<'a> {
     // Latency average/high/low texts
     let mut latency_texts = statis_spans(latency, "ms");
 
@@ -466,7 +466,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
     let latency_percentiles = crate::utils::percentiles(latency, &[50, 90, 99]);
 
     // Latency p50/p90/p99 texts
-    let colors = vec![Color::LightGreen, Color::LightYellow, Color::LightRed];
+    let colors = [Color::LightGreen, Color::LightYellow, Color::LightRed];
     for (i, (name, value)) in latency_percentiles.iter().enumerate() {
         let span = Line::from(vec![Span::styled(
             format!("{name}:     {value:.2} ms"),
@@ -483,7 +483,7 @@ fn latency_paragraph<'a>(latency: &mut Vec<f64>, name: &'static str) -> Paragrap
 }
 
 /// Average/High/Low spans
-fn statis_spans<'a>(data: &Vec<f64>, unit: &'static str) -> Vec<Line<'a>> {
+fn statis_spans<'a>(data: &[f64], unit: &'static str) -> Vec<Line<'a>> {
     vec![
         Line::from(vec![Span::styled(
             format!(
@@ -543,7 +543,7 @@ fn latency_histogram<'a>(
 
 /// Latency/Throughput chart
 fn latency_throughput_chart<'a>(
-    latency_throughput: &'a Vec<(f64, f64)>,
+    latency_throughput: &'a [(f64, f64)],
     batch_sizes: &'a [u32],
     zoom: bool,
     name: &'static str,
diff --git a/benchmark/src/generation.rs b/benchmark/src/generation.rs
index 67afa04e..ea7c9778 100644
--- a/benchmark/src/generation.rs
+++ b/benchmark/src/generation.rs
@@ -163,7 +163,7 @@ async fn prefill(
 
     // Run prefill
     let start_time = Instant::now();
-    let (_, decode_batch) = client.prefill(batch.clone()).await?;
+    let (_, decode_batch, _) = client.prefill(batch.clone()).await?;
 
     // Get latency
     let latency = start_time.elapsed();
diff --git a/benchmark/src/lib.rs b/benchmark/src/lib.rs
index 1875652c..638c6514 100644
--- a/benchmark/src/lib.rs
+++ b/benchmark/src/lib.rs
@@ -8,7 +8,7 @@ use crate::app::App;
 use crate::event::Event;
 use crossterm::ExecutableCommand;
 use std::io;
-use text_generation_client::{NextTokenChooserParameters, ShardedClient};
+use text_generation_client::{GrammarType, NextTokenChooserParameters, ShardedClient};
 use tokenizers::Tokenizer;
 use tokio::sync::{broadcast, mpsc};
 use tui::backend::CrosstermBackend;
@@ -30,6 +30,7 @@ pub async fn run(
     top_p: Option<f32>,
     typical_p: Option<f32>,
     repetition_penalty: Option<f32>,
+    frequency_penalty: Option<f32>,
     watermark: bool,
     do_sample: bool,
     client: ShardedClient,
@@ -42,7 +43,10 @@ pub async fn run(
         do_sample,
         seed: 0,
         repetition_penalty: repetition_penalty.unwrap_or(1.0),
+        frequency_penalty: frequency_penalty.unwrap_or(0.0),
         watermark,
+        grammar: String::new(),
+        grammar_type: GrammarType::None as i32,
     };
 
     // Initialize terminal properties
@@ -140,6 +144,7 @@ pub async fn run(
         top_p,
         typical_p,
         repetition_penalty,
+        frequency_penalty,
         watermark,
         do_sample,
     );
diff --git a/benchmark/src/main.rs b/benchmark/src/main.rs
index 97c8af1c..2d89e045 100644
--- a/benchmark/src/main.rs
+++ b/benchmark/src/main.rs
@@ -84,6 +84,11 @@ struct Args {
     #[clap(long, env)]
     repetition_penalty: Option<f32>,
 
+    /// Generation parameter in case you want to specifically test/debug particular
+    /// decoding strategies, for full doc refer to the `text-generation-server`
+    #[clap(long, env)]
+    frequency_penalty: Option<f32>,
+
     /// Generation parameter in case you want to specifically test/debug particular
     /// decoding strategies, for full doc refer to the `text-generation-server`
     #[clap(long, env)]
@@ -119,6 +124,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         top_p,
         typical_p,
         repetition_penalty,
+        frequency_penalty,
         watermark,
         do_sample,
         master_shard_uds_path,
@@ -187,6 +193,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                 top_p,
                 typical_p,
                 repetition_penalty,
+                frequency_penalty,
                 watermark,
                 do_sample,
                 sharded_client,
diff --git a/benchmark/src/table.rs b/benchmark/src/table.rs
index 9e36717b..e18d7310 100644
--- a/benchmark/src/table.rs
+++ b/benchmark/src/table.rs
@@ -15,6 +15,7 @@ pub(crate) fn parameters_table(
     top_p: Option<f32>,
     typical_p: Option<f32>,
     repetition_penalty: Option<f32>,
+    frequency_penalty: Option<f32>,
     watermark: bool,
     do_sample: bool,
 ) -> Table {
@@ -33,6 +34,7 @@ pub(crate) fn parameters_table(
     builder.push_record(["Top P", &format!("{top_p:?}")]);
     builder.push_record(["Typical P", &format!("{typical_p:?}")]);
     builder.push_record(["Repetition Penalty", &format!("{repetition_penalty:?}")]);
+    builder.push_record(["Frequency Penalty", &format!("{frequency_penalty:?}")]);
     builder.push_record(["Watermark", &watermark.to_string()]);
     builder.push_record(["Do Sample", &do_sample.to_string()]);
 
@@ -149,7 +151,7 @@ fn add_throuhgputs(
     }
 }
 
-fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
+fn avg_min_max(data: &[f64]) -> (f64, f64, f64) {
     let average = data.iter().sum::<f64>() / data.len() as f64;
     let min = data
         .iter()
@@ -162,7 +164,7 @@ fn avg_min_max(data: &Vec<f64>) -> (f64, f64, f64) {
     (average, *min, *max)
 }
 
-fn px(data: &Vec<f64>, p: u32) -> f64 {
+fn px(data: &[f64], p: u32) -> f64 {
     let i = (f64::from(p) / 100.0 * data.len() as f64) as usize;
     *data.get(i).unwrap_or(&std::f64::NAN)
 }
diff --git a/clients/python/.gitignore b/clients/python/.gitignore
index 5758ba92..5a8ecaa7 100644
--- a/clients/python/.gitignore
+++ b/clients/python/.gitignore
@@ -155,4 +155,4 @@ dmypy.json
 cython_debug/
 
 transformers
-safetensors
\ No newline at end of file
+safetensors
diff --git a/clients/python/Makefile b/clients/python/Makefile
index 8b4334bd..42720875 100644
--- a/clients/python/Makefile
+++ b/clients/python/Makefile
@@ -3,4 +3,4 @@ unit-tests:
 
 install:
 	pip install pip --upgrade
-	pip install -e .
\ No newline at end of file
+	pip install -e .
diff --git a/clients/python/README.md b/clients/python/README.md
index 82f3ee0c..bf37508e 100644
--- a/clients/python/README.md
+++ b/clients/python/README.md
@@ -107,7 +107,19 @@ print(text)
 ### Types
 
 ```python
-# Request Parameters
+# enum for grammar type
+class GrammarType(Enum):
+    Json = "json"
+    Regex = "regex"
+
+
+# Grammar type and value
+class Grammar:
+    # Grammar type
+    type: GrammarType
+    # Grammar value
+    value: Union[str, dict]
+
 class Parameters:
     # Activate logits sampling
     do_sample: bool
@@ -116,6 +128,10 @@ class Parameters:
     # The parameter for repetition penalty. 1.0 means no penalty.
     # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
     repetition_penalty: Optional[float]
+    # The parameter for frequency penalty. 1.0 means no penalty
+    # Penalize new tokens based on their existing frequency in the text so far,
+    # decreasing the model's likelihood to repeat the same line verbatim.
+    frequency_penalty: Optional[float]
     # Whether to prepend the prompt to the generated text
     return_full_text: bool
     # Stop generating tokens if a member of `stop_sequences` is generated
@@ -138,10 +154,22 @@ class Parameters:
     best_of: Optional[int]
     # Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
     watermark: bool
+    # Get generation details
+    details: bool
     # Get decoder input token logprobs and ids
     decoder_input_details: bool
     # Return the N most likely tokens at each step
-    top_n_tokens: Optional[int] 
+    top_n_tokens: Optional[int]
+    # grammar to use for generation
+    grammar: Optional[Grammar]
+
+class Request:
+    # Prompt
+    inputs: str
+    # Generation parameters
+    parameters: Optional[Parameters]
+    # Whether to stream output tokens
+    stream: bool
 
 # Decoder input tokens
 class InputToken:
@@ -161,7 +189,7 @@ class Token:
     # Token text
     text: str
     # Logprob
-    logprob: float
+    logprob: Optional[float]
     # Is the token a special token
     # Can be used to ignore tokens when concatenating
     special: bool
@@ -192,7 +220,7 @@ class BestOfSequence:
     # Generated tokens
     tokens: List[Token]
     # Most likely tokens
-    top_tokens: Optional[List[List[Token]]] 
+    top_tokens: Optional[List[List[Token]]]
 
 
 # `generate` details
@@ -236,7 +264,7 @@ class StreamResponse:
     # Generated token
     token: Token
     # Most likely tokens
-    top_tokens: Optional[List[Token]] 
+    top_tokens: Optional[List[Token]]
     # Complete generated text
     # Only available when the generation is finished
     generated_text: Optional[str]
@@ -248,4 +276,4 @@ class StreamResponse:
 class DeployedModel:
     model_id: str
     sha: str
-```
\ No newline at end of file
+```
diff --git a/clients/python/poetry.lock b/clients/python/poetry.lock
index 2d4e45d2..148d9906 100644
--- a/clients/python/poetry.lock
+++ b/clients/python/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -707,18 +707,19 @@ files = [
 
 [[package]]
 name = "pydantic"
-version = "2.4.2"
+version = "2.5.3"
 description = "Data validation using Python type hints"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pydantic-2.4.2-py3-none-any.whl", hash = "sha256:bc3ddf669d234f4220e6e1c4d96b061abe0998185a8d7855c0126782b7abc8c1"},
-    {file = "pydantic-2.4.2.tar.gz", hash = "sha256:94f336138093a5d7f426aac732dcfe7ab4eb4da243c88f891d65deb4a2556ee7"},
+    {file = "pydantic-2.5.3-py3-none-any.whl", hash = "sha256:d0caf5954bee831b6bfe7e338c32b9e30c85dfe080c843680783ac2b631673b4"},
+    {file = "pydantic-2.5.3.tar.gz", hash = "sha256:b3ef57c62535b0941697cce638c08900d87fcb67e29cfa99e8a68f747f393f7a"},
 ]
 
 [package.dependencies]
 annotated-types = ">=0.4.0"
-pydantic-core = "2.10.1"
+importlib-metadata = {version = "*", markers = "python_version == \"3.7\""}
+pydantic-core = "2.14.6"
 typing-extensions = ">=4.6.1"
 
 [package.extras]
@@ -726,117 +727,116 @@ email = ["email-validator (>=2.0.0)"]
 
 [[package]]
 name = "pydantic-core"
-version = "2.10.1"
+version = "2.14.6"
 description = ""
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pydantic_core-2.10.1-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:d64728ee14e667ba27c66314b7d880b8eeb050e58ffc5fec3b7a109f8cddbd63"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:48525933fea744a3e7464c19bfede85df4aba79ce90c60b94d8b6e1eddd67096"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef337945bbd76cce390d1b2496ccf9f90b1c1242a3a7bc242ca4a9fc5993427a"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1392e0638af203cee360495fd2cfdd6054711f2db5175b6e9c3c461b76f5175"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0675ba5d22de54d07bccde38997e780044dcfa9a71aac9fd7d4d7a1d2e3e65f7"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128552af70a64660f21cb0eb4876cbdadf1a1f9d5de820fed6421fa8de07c893"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f6e6aed5818c264412ac0598b581a002a9f050cb2637a84979859e70197aa9e"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ecaac27da855b8d73f92123e5f03612b04c5632fd0a476e469dfc47cd37d6b2e"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b3c01c2fb081fced3bbb3da78510693dc7121bb893a1f0f5f4b48013201f362e"},
-    {file = "pydantic_core-2.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:92f675fefa977625105708492850bcbc1182bfc3e997f8eecb866d1927c98ae6"},
-    {file = "pydantic_core-2.10.1-cp310-none-win32.whl", hash = "sha256:420a692b547736a8d8703c39ea935ab5d8f0d2573f8f123b0a294e49a73f214b"},
-    {file = "pydantic_core-2.10.1-cp310-none-win_amd64.whl", hash = "sha256:0880e239827b4b5b3e2ce05e6b766a7414e5f5aedc4523be6b68cfbc7f61c5d0"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:073d4a470b195d2b2245d0343569aac7e979d3a0dcce6c7d2af6d8a920ad0bea"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:600d04a7b342363058b9190d4e929a8e2e715c5682a70cc37d5ded1e0dd370b4"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39215d809470f4c8d1881758575b2abfb80174a9e8daf8f33b1d4379357e417c"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eeb3d3d6b399ffe55f9a04e09e635554012f1980696d6b0aca3e6cf42a17a03b"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a7902bf75779bc12ccfc508bfb7a4c47063f748ea3de87135d433a4cca7a2f"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3625578b6010c65964d177626fde80cf60d7f2e297d56b925cb5cdeda6e9925a"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:caa48fc31fc7243e50188197b5f0c4228956f97b954f76da157aae7f67269ae8"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:07ec6d7d929ae9c68f716195ce15e745b3e8fa122fc67698ac6498d802ed0fa4"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e6f31a17acede6a8cd1ae2d123ce04d8cca74056c9d456075f4f6f85de055607"},
-    {file = "pydantic_core-2.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d8f1ebca515a03e5654f88411420fea6380fc841d1bea08effb28184e3d4899f"},
-    {file = "pydantic_core-2.10.1-cp311-none-win32.whl", hash = "sha256:6db2eb9654a85ada248afa5a6db5ff1cf0f7b16043a6b070adc4a5be68c716d6"},
-    {file = "pydantic_core-2.10.1-cp311-none-win_amd64.whl", hash = "sha256:4a5be350f922430997f240d25f8219f93b0c81e15f7b30b868b2fddfc2d05f27"},
-    {file = "pydantic_core-2.10.1-cp311-none-win_arm64.whl", hash = "sha256:5fdb39f67c779b183b0c853cd6b45f7db84b84e0571b3ef1c89cdb1dfc367325"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1f22a9ab44de5f082216270552aa54259db20189e68fc12484873d926426921"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8572cadbf4cfa95fb4187775b5ade2eaa93511f07947b38f4cd67cf10783b118"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db9a28c063c7c00844ae42a80203eb6d2d6bbb97070cfa00194dff40e6f545ab"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e2a35baa428181cb2270a15864ec6286822d3576f2ed0f4cd7f0c1708472aff"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05560ab976012bf40f25d5225a58bfa649bb897b87192a36c6fef1ab132540d7"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d6495008733c7521a89422d7a68efa0a0122c99a5861f06020ef5b1f51f9ba7c"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14ac492c686defc8e6133e3a2d9eaf5261b3df26b8ae97450c1647286750b901"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8282bab177a9a3081fd3d0a0175a07a1e2bfb7fcbbd949519ea0980f8a07144d"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:aafdb89fdeb5fe165043896817eccd6434aee124d5ee9b354f92cd574ba5e78f"},
-    {file = "pydantic_core-2.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f6defd966ca3b187ec6c366604e9296f585021d922e666b99c47e78738b5666c"},
-    {file = "pydantic_core-2.10.1-cp312-none-win32.whl", hash = "sha256:7c4d1894fe112b0864c1fa75dffa045720a194b227bed12f4be7f6045b25209f"},
-    {file = "pydantic_core-2.10.1-cp312-none-win_amd64.whl", hash = "sha256:5994985da903d0b8a08e4935c46ed8daf5be1cf217489e673910951dc533d430"},
-    {file = "pydantic_core-2.10.1-cp312-none-win_arm64.whl", hash = "sha256:0d8a8adef23d86d8eceed3e32e9cca8879c7481c183f84ed1a8edc7df073af94"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:9badf8d45171d92387410b04639d73811b785b5161ecadabf056ea14d62d4ede"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:ebedb45b9feb7258fac0a268a3f6bec0a2ea4d9558f3d6f813f02ff3a6dc6698"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfe1090245c078720d250d19cb05d67e21a9cd7c257698ef139bc41cf6c27b4f"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e357571bb0efd65fd55f18db0a2fb0ed89d0bb1d41d906b138f088933ae618bb"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b3dcd587b69bbf54fc04ca157c2323b8911033e827fffaecf0cafa5a892a0904"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c120c9ce3b163b985a3b966bb701114beb1da4b0468b9b236fc754783d85aa3"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15d6bca84ffc966cc9976b09a18cf9543ed4d4ecbd97e7086f9ce9327ea48891"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5cabb9710f09d5d2e9e2748c3e3e20d991a4c5f96ed8f1132518f54ab2967221"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:82f55187a5bebae7d81d35b1e9aaea5e169d44819789837cdd4720d768c55d15"},
-    {file = "pydantic_core-2.10.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1d40f55222b233e98e3921df7811c27567f0e1a4411b93d4c5c0f4ce131bc42f"},
-    {file = "pydantic_core-2.10.1-cp37-none-win32.whl", hash = "sha256:14e09ff0b8fe6e46b93d36a878f6e4a3a98ba5303c76bb8e716f4878a3bee92c"},
-    {file = "pydantic_core-2.10.1-cp37-none-win_amd64.whl", hash = "sha256:1396e81b83516b9d5c9e26a924fa69164156c148c717131f54f586485ac3c15e"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6835451b57c1b467b95ffb03a38bb75b52fb4dc2762bb1d9dbed8de31ea7d0fc"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b00bc4619f60c853556b35f83731bd817f989cba3e97dc792bb8c97941b8053a"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fa467fd300a6f046bdb248d40cd015b21b7576c168a6bb20aa22e595c8ffcdd"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d99277877daf2efe074eae6338453a4ed54a2d93fb4678ddfe1209a0c93a2468"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fa7db7558607afeccb33c0e4bf1c9a9a835e26599e76af6fe2fcea45904083a6"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aad7bd686363d1ce4ee930ad39f14e1673248373f4a9d74d2b9554f06199fb58"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443fed67d33aa85357464f297e3d26e570267d1af6fef1c21ca50921d2976302"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:042462d8d6ba707fd3ce9649e7bf268633a41018d6a998fb5fbacb7e928a183e"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ecdbde46235f3d560b18be0cb706c8e8ad1b965e5c13bbba7450c86064e96561"},
-    {file = "pydantic_core-2.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ed550ed05540c03f0e69e6d74ad58d026de61b9eaebebbaaf8873e585cbb18de"},
-    {file = "pydantic_core-2.10.1-cp38-none-win32.whl", hash = "sha256:8cdbbd92154db2fec4ec973d45c565e767ddc20aa6dbaf50142676484cbff8ee"},
-    {file = "pydantic_core-2.10.1-cp38-none-win_amd64.whl", hash = "sha256:9f6f3e2598604956480f6c8aa24a3384dbf6509fe995d97f6ca6103bb8c2534e"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:655f8f4c8d6a5963c9a0687793da37b9b681d9ad06f29438a3b2326d4e6b7970"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e570ffeb2170e116a5b17e83f19911020ac79d19c96f320cbfa1fa96b470185b"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64322bfa13e44c6c30c518729ef08fda6026b96d5c0be724b3c4ae4da939f875"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:485a91abe3a07c3a8d1e082ba29254eea3e2bb13cbbd4351ea4e5a21912cc9b0"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7c2b8eb9fc872e68b46eeaf835e86bccc3a58ba57d0eedc109cbb14177be531"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a5cb87bdc2e5f620693148b5f8f842d293cae46c5f15a1b1bf7ceeed324a740c"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:25bd966103890ccfa028841a8f30cebcf5875eeac8c4bde4fe221364c92f0c9a"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f323306d0556351735b54acbf82904fe30a27b6a7147153cbe6e19aaaa2aa429"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0c27f38dc4fbf07b358b2bc90edf35e82d1703e22ff2efa4af4ad5de1b3833e7"},
-    {file = "pydantic_core-2.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f1365e032a477c1430cfe0cf2856679529a2331426f8081172c4a74186f1d595"},
-    {file = "pydantic_core-2.10.1-cp39-none-win32.whl", hash = "sha256:a1c311fd06ab3b10805abb72109f01a134019739bd3286b8ae1bc2fc4e50c07a"},
-    {file = "pydantic_core-2.10.1-cp39-none-win_amd64.whl", hash = "sha256:ae8a8843b11dc0b03b57b52793e391f0122e740de3df1474814c700d2622950a"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d43002441932f9a9ea5d6f9efaa2e21458221a3a4b417a14027a1d530201ef1b"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fcb83175cc4936a5425dde3356f079ae03c0802bbdf8ff82c035f8a54b333521"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:962ed72424bf1f72334e2f1e61b68f16c0e596f024ca7ac5daf229f7c26e4208"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2cf5bb4dd67f20f3bbc1209ef572a259027c49e5ff694fa56bed62959b41e1f9"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e544246b859f17373bed915182ab841b80849ed9cf23f1f07b73b7c58baee5fb"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c0877239307b7e69d025b73774e88e86ce82f6ba6adf98f41069d5b0b78bd1bf"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:53df009d1e1ba40f696f8995683e067e3967101d4bb4ea6f667931b7d4a01357"},
-    {file = "pydantic_core-2.10.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:a1254357f7e4c82e77c348dabf2d55f1d14d19d91ff025004775e70a6ef40ada"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:524ff0ca3baea164d6d93a32c58ac79eca9f6cf713586fdc0adb66a8cdeab96a"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f0ac9fb8608dbc6eaf17956bf623c9119b4db7dbb511650910a82e261e6600f"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:320f14bd4542a04ab23747ff2c8a778bde727158b606e2661349557f0770711e"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63974d168b6233b4ed6a0046296803cb13c56637a7b8106564ab575926572a55"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:417243bf599ba1f1fef2bb8c543ceb918676954734e2dcb82bf162ae9d7bd514"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dda81e5ec82485155a19d9624cfcca9be88a405e2857354e5b089c2a982144b2"},
-    {file = "pydantic_core-2.10.1-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:14cfbb00959259e15d684505263d5a21732b31248a5dd4941f73a3be233865b9"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:631cb7415225954fdcc2a024119101946793e5923f6c4d73a5914d27eb3d3a05"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec7dd208a4182e99c5b6c501ce0b1f49de2802448d4056091f8e630b28e9a52"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:149b8a07712f45b332faee1a2258d8ef1fb4a36f88c0c17cb687f205c5dc6e7d"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d966c47f9dd73c2d32a809d2be529112d509321c5310ebf54076812e6ecd884"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7eb037106f5c6b3b0b864ad226b0b7ab58157124161d48e4b30c4a43fef8bc4b"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:154ea7c52e32dce13065dbb20a4a6f0cc012b4f667ac90d648d36b12007fa9f7"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e562617a45b5a9da5be4abe72b971d4f00bf8555eb29bb91ec2ef2be348cd132"},
-    {file = "pydantic_core-2.10.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f23b55eb5464468f9e0e9a9935ce3ed2a870608d5f534025cd5536bca25b1402"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:e9121b4009339b0f751955baf4543a0bfd6bc3f8188f8056b1a25a2d45099934"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:0523aeb76e03f753b58be33b26540880bac5aa54422e4462404c432230543f33"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e0e2959ef5d5b8dc9ef21e1a305a21a36e254e6a34432d00c72a92fdc5ecda5"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da01bec0a26befab4898ed83b362993c844b9a607a86add78604186297eb047e"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f2e9072d71c1f6cfc79a36d4484c82823c560e6f5599c43c1ca6b5cdbd54f881"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f36a3489d9e28fe4b67be9992a23029c3cec0babc3bd9afb39f49844a8c721c5"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f64f82cc3443149292b32387086d02a6c7fb39b8781563e0ca7b8d7d9cf72bd7"},
-    {file = "pydantic_core-2.10.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:b4a6db486ac8e99ae696e09efc8b2b9fea67b63c8f88ba7a1a16c24a057a0776"},
-    {file = "pydantic_core-2.10.1.tar.gz", hash = "sha256:0f8682dbdd2f67f8e1edddcbffcc29f60a6182b4901c367fc8c1c40d30bb0a82"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:72f9a942d739f09cd42fffe5dc759928217649f070056f03c70df14f5770acf9"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6a31d98c0d69776c2576dda4b77b8e0c69ad08e8b539c25c7d0ca0dc19a50d6c"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5aa90562bc079c6c290f0512b21768967f9968e4cfea84ea4ff5af5d917016e4"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:370ffecb5316ed23b667d99ce4debe53ea664b99cc37bfa2af47bc769056d534"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f85f3843bdb1fe80e8c206fe6eed7a1caeae897e496542cee499c374a85c6e08"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9862bf828112e19685b76ca499b379338fd4c5c269d897e218b2ae8fcb80139d"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:036137b5ad0cb0004c75b579445a1efccd072387a36c7f217bb8efd1afbe5245"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:92879bce89f91f4b2416eba4429c7b5ca22c45ef4a499c39f0c5c69257522c7c"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0c08de15d50fa190d577e8591f0329a643eeaed696d7771760295998aca6bc66"},
+    {file = "pydantic_core-2.14.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:36099c69f6b14fc2c49d7996cbf4f87ec4f0e66d1c74aa05228583225a07b590"},
+    {file = "pydantic_core-2.14.6-cp310-none-win32.whl", hash = "sha256:7be719e4d2ae6c314f72844ba9d69e38dff342bc360379f7c8537c48e23034b7"},
+    {file = "pydantic_core-2.14.6-cp310-none-win_amd64.whl", hash = "sha256:36fa402dcdc8ea7f1b0ddcf0df4254cc6b2e08f8cd80e7010d4c4ae6e86b2a87"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dea7fcd62915fb150cdc373212141a30037e11b761fbced340e9db3379b892d4"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ffff855100bc066ff2cd3aa4a60bc9534661816b110f0243e59503ec2df38421"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1b027c86c66b8627eb90e57aee1f526df77dc6d8b354ec498be9a757d513b92b"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00b1087dabcee0b0ffd104f9f53d7d3eaddfaa314cdd6726143af6bc713aa27e"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:75ec284328b60a4e91010c1acade0c30584f28a1f345bc8f72fe8b9e46ec6a96"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e1f4744eea1501404b20b0ac059ff7e3f96a97d3e3f48ce27a139e053bb370b"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2602177668f89b38b9f84b7b3435d0a72511ddef45dc14446811759b82235a1"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8edaea3089bf908dd27da8f5d9e395c5b4dc092dbcce9b65e7156099b4b937"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:478e9e7b360dfec451daafe286998d4a1eeaecf6d69c427b834ae771cad4b622"},
+    {file = "pydantic_core-2.14.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b6ca36c12a5120bad343eef193cc0122928c5c7466121da7c20f41160ba00ba2"},
+    {file = "pydantic_core-2.14.6-cp311-none-win32.whl", hash = "sha256:2b8719037e570639e6b665a4050add43134d80b687288ba3ade18b22bbb29dd2"},
+    {file = "pydantic_core-2.14.6-cp311-none-win_amd64.whl", hash = "sha256:78ee52ecc088c61cce32b2d30a826f929e1708f7b9247dc3b921aec367dc1b23"},
+    {file = "pydantic_core-2.14.6-cp311-none-win_arm64.whl", hash = "sha256:a19b794f8fe6569472ff77602437ec4430f9b2b9ec7a1105cfd2232f9ba355e6"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:667aa2eac9cd0700af1ddb38b7b1ef246d8cf94c85637cbb03d7757ca4c3fdec"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cdee837710ef6b56ebd20245b83799fce40b265b3b406e51e8ccc5b85b9099b7"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c5bcf3414367e29f83fd66f7de64509a8fd2368b1edf4351e862910727d3e51"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:26a92ae76f75d1915806b77cf459811e772d8f71fd1e4339c99750f0e7f6324f"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a983cca5ed1dd9a35e9e42ebf9f278d344603bfcb174ff99a5815f953925140a"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cb92f9061657287eded380d7dc455bbf115430b3aa4741bdc662d02977e7d0af"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4ace1e220b078c8e48e82c081e35002038657e4b37d403ce940fa679e57113b"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ef633add81832f4b56d3b4c9408b43d530dfca29e68fb1b797dcb861a2c734cd"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e90d6cc4aad2cc1f5e16ed56e46cebf4877c62403a311af20459c15da76fd91"},
+    {file = "pydantic_core-2.14.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e8a5ac97ea521d7bde7621d86c30e86b798cdecd985723c4ed737a2aa9e77d0c"},
+    {file = "pydantic_core-2.14.6-cp312-none-win32.whl", hash = "sha256:f27207e8ca3e5e021e2402ba942e5b4c629718e665c81b8b306f3c8b1ddbb786"},
+    {file = "pydantic_core-2.14.6-cp312-none-win_amd64.whl", hash = "sha256:b3e5fe4538001bb82e2295b8d2a39356a84694c97cb73a566dc36328b9f83b40"},
+    {file = "pydantic_core-2.14.6-cp312-none-win_arm64.whl", hash = "sha256:64634ccf9d671c6be242a664a33c4acf12882670b09b3f163cd00a24cffbd74e"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:24368e31be2c88bd69340fbfe741b405302993242ccb476c5c3ff48aeee1afe0"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:e33b0834f1cf779aa839975f9d8755a7c2420510c0fa1e9fa0497de77cd35d2c"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6af4b3f52cc65f8a0bc8b1cd9676f8c21ef3e9132f21fed250f6958bd7223bed"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d15687d7d7f40333bd8266f3814c591c2e2cd263fa2116e314f60d82086e353a"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:095b707bb287bfd534044166ab767bec70a9bba3175dcdc3371782175c14e43c"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94fc0e6621e07d1e91c44e016cc0b189b48db053061cc22d6298a611de8071bb"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ce830e480f6774608dedfd4a90c42aac4a7af0a711f1b52f807130c2e434c06"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a306cdd2ad3a7d795d8e617a58c3a2ed0f76c8496fb7621b6cd514eb1532cae8"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f5fa187bde8524b1e37ba894db13aadd64faa884657473b03a019f625cee9a8"},
+    {file = "pydantic_core-2.14.6-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:438027a975cc213a47c5d70672e0d29776082155cfae540c4e225716586be75e"},
+    {file = "pydantic_core-2.14.6-cp37-none-win32.whl", hash = "sha256:f96ae96a060a8072ceff4cfde89d261837b4294a4f28b84a28765470d502ccc6"},
+    {file = "pydantic_core-2.14.6-cp37-none-win_amd64.whl", hash = "sha256:e646c0e282e960345314f42f2cea5e0b5f56938c093541ea6dbf11aec2862391"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:db453f2da3f59a348f514cfbfeb042393b68720787bbef2b4c6068ea362c8149"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3860c62057acd95cc84044e758e47b18dcd8871a328ebc8ccdefd18b0d26a21b"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36026d8f99c58d7044413e1b819a67ca0e0b8ebe0f25e775e6c3d1fabb3c38fb"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ed1af8692bd8d2a29d702f1a2e6065416d76897d726e45a1775b1444f5928a7"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:314ccc4264ce7d854941231cf71b592e30d8d368a71e50197c905874feacc8a8"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:982487f8931067a32e72d40ab6b47b1628a9c5d344be7f1a4e668fb462d2da42"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2dbe357bc4ddda078f79d2a36fc1dd0494a7f2fad83a0a684465b6f24b46fe80"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2f6ffc6701a0eb28648c845f4945a194dc7ab3c651f535b81793251e1185ac3d"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5025db12fc6de7bc1104d826d5aee1d172f9ba6ca936bf6474c2148ac336c1"},
+    {file = "pydantic_core-2.14.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dab03ed811ed1c71d700ed08bde8431cf429bbe59e423394f0f4055f1ca0ea60"},
+    {file = "pydantic_core-2.14.6-cp38-none-win32.whl", hash = "sha256:dfcbebdb3c4b6f739a91769aea5ed615023f3c88cb70df812849aef634c25fbe"},
+    {file = "pydantic_core-2.14.6-cp38-none-win_amd64.whl", hash = "sha256:99b14dbea2fdb563d8b5a57c9badfcd72083f6006caf8e126b491519c7d64ca8"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:4ce8299b481bcb68e5c82002b96e411796b844d72b3e92a3fbedfe8e19813eab"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b9a9d92f10772d2a181b5ca339dee066ab7d1c9a34ae2421b2a52556e719756f"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd9e98b408384989ea4ab60206b8e100d8687da18b5c813c11e92fd8212a98e0"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4f86f1f318e56f5cbb282fe61eb84767aee743ebe32c7c0834690ebea50c0a6b"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86ce5fcfc3accf3a07a729779d0b86c5d0309a4764c897d86c11089be61da160"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dcf1978be02153c6a31692d4fbcc2a3f1db9da36039ead23173bc256ee3b91b"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eedf97be7bc3dbc8addcef4142f4b4164066df0c6f36397ae4aaed3eb187d8ab"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5f916acf8afbcab6bacbb376ba7dc61f845367901ecd5e328fc4d4aef2fcab0"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8a14c192c1d724c3acbfb3f10a958c55a2638391319ce8078cb36c02283959b9"},
+    {file = "pydantic_core-2.14.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0348b1dc6b76041516e8a854ff95b21c55f5a411c3297d2ca52f5528e49d8411"},
+    {file = "pydantic_core-2.14.6-cp39-none-win32.whl", hash = "sha256:de2a0645a923ba57c5527497daf8ec5df69c6eadf869e9cd46e86349146e5975"},
+    {file = "pydantic_core-2.14.6-cp39-none-win_amd64.whl", hash = "sha256:aca48506a9c20f68ee61c87f2008f81f8ee99f8d7f0104bff3c47e2d148f89d9"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:d5c28525c19f5bb1e09511669bb57353d22b94cf8b65f3a8d141c389a55dec95"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:78d0768ee59baa3de0f4adac9e3748b4b1fffc52143caebddfd5ea2961595277"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b93785eadaef932e4fe9c6e12ba67beb1b3f1e5495631419c784ab87e975670"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a874f21f87c485310944b2b2734cd6d318765bcbb7515eead33af9641816506e"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89f4477d915ea43b4ceea6756f63f0288941b6443a2b28c69004fe07fde0d0d"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:172de779e2a153d36ee690dbc49c6db568d7b33b18dc56b69a7514aecbcf380d"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:dfcebb950aa7e667ec226a442722134539e77c575f6cfaa423f24371bb8d2e94"},
+    {file = "pydantic_core-2.14.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:55a23dcd98c858c0db44fc5c04fc7ed81c4b4d33c653a7c45ddaebf6563a2f66"},
+    {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4241204e4b36ab5ae466ecec5c4c16527a054c69f99bba20f6f75232a6a534e2"},
+    {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e574de99d735b3fc8364cba9912c2bec2da78775eba95cbb225ef7dda6acea24"},
+    {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1302a54f87b5cd8528e4d6d1bf2133b6aa7c6122ff8e9dc5220fbc1e07bffebd"},
+    {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f8e81e4b55930e5ffab4a68db1af431629cf2e4066dbdbfef65348b8ab804ea8"},
+    {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:c99462ffc538717b3e60151dfaf91125f637e801f5ab008f81c402f1dff0cd0f"},
+    {file = "pydantic_core-2.14.6-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e4cf2d5829f6963a5483ec01578ee76d329eb5caf330ecd05b3edd697e7d768a"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:cf10b7d58ae4a1f07fccbf4a0a956d705356fea05fb4c70608bb6fa81d103cda"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:399ac0891c284fa8eb998bcfa323f2234858f5d2efca3950ae58c8f88830f145"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c6a5c79b28003543db3ba67d1df336f253a87d3112dac3a51b94f7d48e4c0e1"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:599c87d79cab2a6a2a9df4aefe0455e61e7d2aeede2f8577c1b7c0aec643ee8e"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e166ad47ba900f2542a80d83f9fc65fe99eb63ceec4debec160ae729824052"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:3a0b5db001b98e1c649dd55afa928e75aa4087e587b9524a4992316fa23c9fba"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:747265448cb57a9f37572a488a57d873fd96bf51e5bb7edb52cfb37124516da4"},
+    {file = "pydantic_core-2.14.6-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:7ebe3416785f65c28f4f9441e916bfc8a54179c8dea73c23023f7086fa601c5d"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:86c963186ca5e50d5c8287b1d1c9d3f8f024cbe343d048c5bd282aec2d8641f2"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:e0641b506486f0b4cd1500a2a65740243e8670a2549bb02bc4556a83af84ae03"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71d72ca5eaaa8d38c8df16b7deb1a2da4f650c41b58bb142f3fb75d5ad4a611f"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27e524624eace5c59af499cd97dc18bb201dc6a7a2da24bfc66ef151c69a5f2a"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3dde6cac75e0b0902778978d3b1646ca9f438654395a362cb21d9ad34b24acf"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:00646784f6cd993b1e1c0e7b0fdcbccc375d539db95555477771c27555e3c556"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:23598acb8ccaa3d1d875ef3b35cb6376535095e9405d91a3d57a8c7db5d29341"},
+    {file = "pydantic_core-2.14.6-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7f41533d7e3cf9520065f610b41ac1c76bc2161415955fbcead4981b22c7611e"},
+    {file = "pydantic_core-2.14.6.tar.gz", hash = "sha256:1fd0c1d395372843fba13a51c28e3bb9d59bd7aebfeb17358ffaaa1e4dbbe948"},
 ]
 
 [package.dependencies]
@@ -928,6 +928,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index 4fe6e8b0..2925085b 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation"
-version = "0.6.1"
+version = "0.7.0"
 description = "Hugging Face Text Generation Python Client"
 license = "Apache-2.0"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
@@ -12,7 +12,7 @@ repository = "https://github.com/huggingface/text-generation-inference"
 
 [tool.poetry.dependencies]
 python = "^3.7"
-pydantic = "> 1.10, < 3"
+pydantic = "> 2, < 3"
 aiohttp = "^3.8"
 huggingface-hub = ">= 0.12, < 1.0"
 
diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py
index 0bf80f8c..95d23901 100644
--- a/clients/python/text_generation/client.py
+++ b/clients/python/text_generation/client.py
@@ -3,13 +3,19 @@ import requests
 
 from aiohttp import ClientSession, ClientTimeout
 from pydantic import ValidationError
-from typing import Dict, Optional, List, AsyncIterator, Iterator
+from typing import Dict, Optional, List, AsyncIterator, Iterator, Union
 
 from text_generation.types import (
     StreamResponse,
     Response,
     Request,
     Parameters,
+    Grammar,
+    ChatRequest,
+    ChatCompletionChunk,
+    ChatComplete,
+    Message,
+    Tool,
 )
 from text_generation.errors import parse_error
 
@@ -58,6 +64,120 @@ class Client:
         self.cookies = cookies
         self.timeout = timeout
 
+    def chat(
+        self,
+        messages: List[Message],
+        repetition_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        logit_bias: Optional[List[float]] = None,
+        logprobs: Optional[bool] = None,
+        top_logprobs: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[float] = None,
+        stream: bool = False,
+        seed: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        tools: Optional[List[Tool]] = None,
+        tool_choice: Optional[str] = None,
+    ):
+        """
+        Given a list of messages, generate a response asynchronously
+
+        Args:
+            messages (`List[Message]`):
+                List of messages
+            repetition_penalty (`float`):
+                The parameter for repetition penalty. 0.0 means no penalty. See [this
+                paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            frequency_penalty (`float`):
+                The parameter for frequency penalty. 0.0 means no penalty
+                Penalize new tokens based on their existing frequency in the text so far,
+                decreasing the model's likelihood to repeat the same line verbatim.
+            logit_bias (`List[float]`):
+                Adjust the likelihood of specified tokens
+            logprobs (`bool`):
+                Include log probabilities in the response
+            top_logprobs (`int`):
+                Include the `n` most likely tokens at each step
+            max_tokens (`int`):
+                Maximum number of generated tokens
+            n (`int`):
+                Generate `n` completions
+            presence_penalty (`float`):
+                The parameter for presence penalty. 0.0 means no penalty. See [this
+                paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            stream (`bool`):
+                Stream the response
+            seed (`int`):
+                Random sampling seed
+            temperature (`float`):
+                The value used to module the logits distribution.
+            top_p (`float`):
+                If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
+                higher are kept for generation
+            tools (`List[Tool]`):
+                List of tools to use
+            tool_choice (`str`):
+                The tool to use
+
+        """
+        request = ChatRequest(
+            model="tgi",
+            messages=messages,
+            repetition_penalty=repetition_penalty,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            stream=stream,
+            seed=seed,
+            temperature=temperature,
+            top_p=top_p,
+            tools=tools,
+            tool_choice=tool_choice,
+        )
+        if not stream:
+            resp = requests.post(
+                f"{self.base_url}/v1/chat/completions",
+                json=request.dict(),
+                headers=self.headers,
+                cookies=self.cookies,
+                timeout=self.timeout,
+            )
+            payload = resp.json()
+            if resp.status_code != 200:
+                raise parse_error(resp.status_code, payload)
+            return ChatComplete(**payload)
+        else:
+            return self._chat_stream_response(request)
+
+    def _chat_stream_response(self, request):
+        resp = requests.post(
+            f"{self.base_url}/v1/chat/completions",
+            json=request.dict(),
+            headers=self.headers,
+            cookies=self.cookies,
+            timeout=self.timeout,
+            stream=True,
+        )
+        # iterate and print stream
+        for byte_payload in resp.iter_lines():
+            if byte_payload == b"\n":
+                continue
+            payload = byte_payload.decode("utf-8")
+            if payload.startswith("data:"):
+                json_payload = json.loads(payload.lstrip("data:").rstrip("\n"))
+                try:
+                    response = ChatCompletionChunk(**json_payload)
+                    yield response
+                except ValidationError:
+                    raise parse_error(resp.status, json_payload)
+
     def generate(
         self,
         prompt: str,
@@ -65,6 +185,7 @@ class Client:
         max_new_tokens: int = 20,
         best_of: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
         stop_sequences: Optional[List[str]] = None,
@@ -76,6 +197,7 @@ class Client:
         watermark: bool = False,
         decoder_input_details: bool = False,
         top_n_tokens: Optional[int] = None,
+        grammar: Optional[Grammar] = None,
     ) -> Response:
         """
         Given a prompt, generate the following text
@@ -92,6 +214,10 @@ class Client:
             repetition_penalty (`float`):
                 The parameter for repetition penalty. 1.0 means no penalty. See [this
                 paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            frequency_penalty (`float`):
+                The parameter for frequency penalty. 1.0 means no penalty
+                Penalize new tokens based on their existing frequency in the text so far,
+                decreasing the model's likelihood to repeat the same line verbatim.
             return_full_text (`bool`):
                 Whether to prepend the prompt to the generated text
             seed (`int`):
@@ -116,6 +242,9 @@ class Client:
                 Return the decoder input token logprobs and ids
             top_n_tokens (`int`):
                 Return the `n` most likely tokens at each step
+            grammar (`Grammar`):
+                Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
+                of the text to match a regular expression or JSON schema.
 
         Returns:
             Response: generated response
@@ -127,6 +256,7 @@ class Client:
             do_sample=do_sample,
             max_new_tokens=max_new_tokens,
             repetition_penalty=repetition_penalty,
+            frequency_penalty=frequency_penalty,
             return_full_text=return_full_text,
             seed=seed,
             stop=stop_sequences if stop_sequences is not None else [],
@@ -138,6 +268,7 @@ class Client:
             watermark=watermark,
             decoder_input_details=decoder_input_details,
             top_n_tokens=top_n_tokens,
+            grammar=grammar,
         )
         request = Request(inputs=prompt, stream=False, parameters=parameters)
 
@@ -159,6 +290,7 @@ class Client:
         do_sample: bool = False,
         max_new_tokens: int = 20,
         repetition_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
         stop_sequences: Optional[List[str]] = None,
@@ -169,6 +301,7 @@ class Client:
         typical_p: Optional[float] = None,
         watermark: bool = False,
         top_n_tokens: Optional[int] = None,
+        grammar: Optional[Grammar] = None,
     ) -> Iterator[StreamResponse]:
         """
         Given a prompt, generate the following stream of tokens
@@ -183,6 +316,10 @@ class Client:
             repetition_penalty (`float`):
                 The parameter for repetition penalty. 1.0 means no penalty. See [this
                 paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            frequency_penalty (`float`):
+                The parameter for frequency penalty. 1.0 means no penalty
+                Penalize new tokens based on their existing frequency in the text so far,
+                decreasing the model's likelihood to repeat the same line verbatim.
             return_full_text (`bool`):
                 Whether to prepend the prompt to the generated text
             seed (`int`):
@@ -205,6 +342,9 @@ class Client:
                 Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
             top_n_tokens (`int`):
                 Return the `n` most likely tokens at each step
+            grammar (`Grammar`):
+                Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
+                of the text to match a regular expression or JSON schema.
 
         Returns:
             Iterator[StreamResponse]: stream of generated tokens
@@ -217,6 +357,7 @@ class Client:
             do_sample=do_sample,
             max_new_tokens=max_new_tokens,
             repetition_penalty=repetition_penalty,
+            frequency_penalty=frequency_penalty,
             return_full_text=return_full_text,
             seed=seed,
             stop=stop_sequences if stop_sequences is not None else [],
@@ -227,6 +368,7 @@ class Client:
             typical_p=typical_p,
             watermark=watermark,
             top_n_tokens=top_n_tokens,
+            grammar=grammar,
         )
         request = Request(inputs=prompt, stream=True, parameters=parameters)
 
@@ -306,7 +448,120 @@ class AsyncClient:
         self.base_url = base_url
         self.headers = headers
         self.cookies = cookies
-        self.timeout = ClientTimeout(timeout * 60)
+        self.timeout = ClientTimeout(timeout)
+
+    async def chat(
+        self,
+        messages: List[Message],
+        repetition_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        logit_bias: Optional[List[float]] = None,
+        logprobs: Optional[bool] = None,
+        top_logprobs: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[float] = None,
+        stream: bool = False,
+        seed: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        tools: Optional[List[Tool]] = None,
+        tool_choice: Optional[str] = None,
+    ) -> Union[ChatComplete, AsyncIterator[ChatCompletionChunk]]:
+        """
+        Given a list of messages, generate a response asynchronously
+
+        Args:
+            messages (`List[Message]`):
+                List of messages
+            repetition_penalty (`float`):
+                The parameter for frequency penalty. 0.0 means no penalty. See [this
+                paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            frequency_penalty (`float`):
+                The parameter for frequency penalty. 0.0 means no penalty
+                Penalize new tokens based on their existing frequency in the text so far,
+                decreasing the model's likelihood to repeat the same line verbatim.
+            logit_bias (`List[float]`):
+                Adjust the likelihood of specified tokens
+            logprobs (`bool`):
+                Include log probabilities in the response
+            top_logprobs (`int`):
+                Include the `n` most likely tokens at each step
+            max_tokens (`int`):
+                Maximum number of generated tokens
+            n (`int`):
+                Generate `n` completions
+            presence_penalty (`float`):
+                The parameter for presence penalty. 0.0 means no penalty. See [this
+                paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            stream (`bool`):
+                Stream the response
+            seed (`int`):
+                Random sampling seed
+            temperature (`float`):
+                The value used to module the logits distribution.
+            top_p (`float`):
+                If set to < 1, only the smallest set of most probable tokens with probabilities that add up to `top_p` or
+                higher are kept for generation
+            tools (`List[Tool]`):
+                List of tools to use
+            tool_choice (`str`):
+                The tool to use
+
+        """
+        request = ChatRequest(
+            model="tgi",
+            messages=messages,
+            repetition_penalty=repetition_penalty,
+            frequency_penalty=frequency_penalty,
+            logit_bias=logit_bias,
+            logprobs=logprobs,
+            top_logprobs=top_logprobs,
+            max_tokens=max_tokens,
+            n=n,
+            presence_penalty=presence_penalty,
+            stream=stream,
+            seed=seed,
+            temperature=temperature,
+            top_p=top_p,
+            tools=tools,
+            tool_choice=tool_choice,
+        )
+        if not stream:
+            return await self._chat_single_response(request)
+        else:
+            return self._chat_stream_response(request)
+
+    async def _chat_single_response(self, request):
+        async with ClientSession(
+            headers=self.headers, cookies=self.cookies, timeout=self.timeout
+        ) as session:
+            async with session.post(
+                f"{self.base_url}/v1/chat/completions", json=request.dict()
+            ) as resp:
+                payload = await resp.json()
+                if resp.status != 200:
+                    raise parse_error(resp.status, payload)
+                return ChatComplete(**payload)
+
+    async def _chat_stream_response(self, request):
+        async with ClientSession(
+            headers=self.headers, cookies=self.cookies, timeout=self.timeout
+        ) as session:
+            async with session.post(
+                f"{self.base_url}/v1/chat/completions", json=request.dict()
+            ) as resp:
+                async for byte_payload in resp.content:
+                    if byte_payload == b"\n":
+                        continue
+                    payload = byte_payload.decode("utf-8")
+                    if payload.startswith("data:"):
+                        json_payload = json.loads(payload.lstrip("data:").rstrip("\n"))
+                        try:
+                            response = ChatCompletionChunk(**json_payload)
+                            yield response
+                        except ValidationError:
+                            raise parse_error(resp.status, json_payload)
 
     async def generate(
         self,
@@ -315,6 +570,7 @@ class AsyncClient:
         max_new_tokens: int = 20,
         best_of: Optional[int] = None,
         repetition_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
         stop_sequences: Optional[List[str]] = None,
@@ -326,6 +582,7 @@ class AsyncClient:
         watermark: bool = False,
         decoder_input_details: bool = False,
         top_n_tokens: Optional[int] = None,
+        grammar: Optional[Grammar] = None,
     ) -> Response:
         """
         Given a prompt, generate the following text asynchronously
@@ -342,6 +599,10 @@ class AsyncClient:
             repetition_penalty (`float`):
                 The parameter for repetition penalty. 1.0 means no penalty. See [this
                 paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            frequency_penalty (`float`):
+                The parameter for frequency penalty. 1.0 means no penalty
+                Penalize new tokens based on their existing frequency in the text so far,
+                decreasing the model's likelihood to repeat the same line verbatim.
             return_full_text (`bool`):
                 Whether to prepend the prompt to the generated text
             seed (`int`):
@@ -366,10 +627,14 @@ class AsyncClient:
                 Return the decoder input token logprobs and ids
             top_n_tokens (`int`):
                 Return the `n` most likely tokens at each step
+            grammar (`Grammar`):
+                Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
+                of the text to match a regular expression or JSON schema.
 
         Returns:
             Response: generated response
         """
+
         # Validate parameters
         parameters = Parameters(
             best_of=best_of,
@@ -378,6 +643,7 @@ class AsyncClient:
             do_sample=do_sample,
             max_new_tokens=max_new_tokens,
             repetition_penalty=repetition_penalty,
+            frequency_penalty=frequency_penalty,
             return_full_text=return_full_text,
             seed=seed,
             stop=stop_sequences if stop_sequences is not None else [],
@@ -388,6 +654,7 @@ class AsyncClient:
             typical_p=typical_p,
             watermark=watermark,
             top_n_tokens=top_n_tokens,
+            grammar=grammar,
         )
         request = Request(inputs=prompt, stream=False, parameters=parameters)
 
@@ -407,6 +674,7 @@ class AsyncClient:
         do_sample: bool = False,
         max_new_tokens: int = 20,
         repetition_penalty: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
         return_full_text: bool = False,
         seed: Optional[int] = None,
         stop_sequences: Optional[List[str]] = None,
@@ -417,6 +685,7 @@ class AsyncClient:
         typical_p: Optional[float] = None,
         watermark: bool = False,
         top_n_tokens: Optional[int] = None,
+        grammar: Optional[Grammar] = None,
     ) -> AsyncIterator[StreamResponse]:
         """
         Given a prompt, generate the following stream of tokens asynchronously
@@ -431,6 +700,10 @@ class AsyncClient:
             repetition_penalty (`float`):
                 The parameter for repetition penalty. 1.0 means no penalty. See [this
                 paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+            frequency_penalty (`float`):
+                The parameter for frequency penalty. 1.0 means no penalty
+                Penalize new tokens based on their existing frequency in the text so far,
+                decreasing the model's likelihood to repeat the same line verbatim.
             return_full_text (`bool`):
                 Whether to prepend the prompt to the generated text
             seed (`int`):
@@ -453,6 +726,9 @@ class AsyncClient:
                 Watermarking with [A Watermark for Large Language Models](https://arxiv.org/abs/2301.10226)
             top_n_tokens (`int`):
                 Return the `n` most likely tokens at each step
+            grammar (`Grammar`):
+                Whether to use a grammar for the generation and the grammar to use. Grammars will constrain the generation
+                of the text to match a regular expression or JSON schema.
 
         Returns:
             AsyncIterator[StreamResponse]: stream of generated tokens
@@ -465,6 +741,7 @@ class AsyncClient:
             do_sample=do_sample,
             max_new_tokens=max_new_tokens,
             repetition_penalty=repetition_penalty,
+            frequency_penalty=frequency_penalty,
             return_full_text=return_full_text,
             seed=seed,
             stop=stop_sequences if stop_sequences is not None else [],
@@ -475,6 +752,7 @@ class AsyncClient:
             typical_p=typical_p,
             watermark=watermark,
             top_n_tokens=top_n_tokens,
+            grammar=grammar,
         )
         request = Request(inputs=prompt, stream=True, parameters=parameters)
 
diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py
index aa02d8d8..deb987c5 100644
--- a/clients/python/text_generation/types.py
+++ b/clients/python/text_generation/types.py
@@ -1,10 +1,147 @@
 from enum import Enum
-from pydantic import BaseModel, validator
-from typing import Optional, List
+from pydantic import BaseModel, field_validator
+from typing import Optional, List, Union, Any
 
 from text_generation.errors import ValidationError
 
 
+# enum for grammar type
+class GrammarType(str, Enum):
+    Json = "json"
+    Regex = "regex"
+
+
+# Grammar type and value
+class Grammar(BaseModel):
+    # Grammar type
+    type: GrammarType
+    # Grammar value
+    value: Union[str, dict]
+
+
+class ToolCall(BaseModel):
+    # Id of the tool call
+    id: int
+    # Type of the tool call
+    type: str
+    # Function details of the tool call
+    function: dict
+
+
+class Message(BaseModel):
+    # Role of the message sender
+    role: str
+    # Content of the message
+    content: Optional[str] = None
+    # Optional name of the message sender
+    name: Optional[str] = None
+    # Tool calls associated with the chat completion
+    tool_calls: Optional[Any] = None
+
+
+class Tool(BaseModel):
+    # Type of the tool
+    type: str
+    # Function details of the tool
+    function: dict
+
+
+class ChatCompletionComplete(BaseModel):
+    # Index of the chat completion
+    index: int
+    # Message associated with the chat completion
+    message: Message
+    # Log probabilities for the chat completion
+    logprobs: Optional[Any]
+    # Reason for completion
+    finish_reason: str
+    # Usage details of the chat completion
+    usage: Optional[Any] = None
+
+
+class Function(BaseModel):
+    name: Optional[str]
+    arguments: str
+
+
+class ChoiceDeltaToolCall(BaseModel):
+    index: int
+    id: str
+    type: str
+    function: Function
+
+
+class ChoiceDelta(BaseModel):
+    role: str
+    content: Optional[str] = None
+    tool_calls: Optional[ChoiceDeltaToolCall]
+
+
+class Choice(BaseModel):
+    index: int
+    delta: ChoiceDelta
+    logprobs: Optional[dict] = None
+    finish_reason: Optional[str] = None
+
+
+class ChatCompletionChunk(BaseModel):
+    id: str
+    object: str
+    created: int
+    model: str
+    system_fingerprint: str
+    choices: List[Choice]
+
+
+class ChatComplete(BaseModel):
+    # Chat completion details
+    id: str
+    object: str
+    created: int
+    model: str
+    system_fingerprint: str
+    choices: List[ChatCompletionComplete]
+    usage: Any
+
+
+class ChatRequest(BaseModel):
+    # Model identifier
+    model: str
+    # List of messages in the conversation
+    messages: List[Message]
+    # The parameter for repetition penalty. 1.0 means no penalty.
+    # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
+    repetition_penalty: Optional[float] = None
+    # The parameter for frequency penalty. 1.0 means no penalty
+    # Penalize new tokens based on their existing frequency in the text so far,
+    # decreasing the model's likelihood to repeat the same line verbatim.
+    frequency_penalty: Optional[float] = None
+    # Bias values for token selection
+    logit_bias: Optional[List[float]] = None
+    # Whether to return log probabilities
+    logprobs: Optional[bool] = None
+    # Number of most likely tokens to return at each position
+    top_logprobs: Optional[int] = None
+    # Maximum number of tokens to generate
+    max_tokens: Optional[int] = None
+    # Number of chat completion choices to generate
+    n: Optional[int] = None
+    # Penalty for presence of new tokens
+    presence_penalty: Optional[float] = None
+    # Flag to indicate streaming response
+    stream: bool = False
+    # Random sampling seed
+    seed: Optional[int] = None
+    # Sampling temperature
+    temperature: Optional[float] = None
+    # Top-p value for nucleus sampling
+    top_p: Optional[float] = None
+    # List of tools to be used
+    tools: Optional[List[Tool]] = None
+    # Choice of tool to be used
+    tool_choice: Optional[str] = None
+
+
 class Parameters(BaseModel):
     # Activate logits sampling
     do_sample: bool = False
@@ -13,6 +150,10 @@ class Parameters(BaseModel):
     # The parameter for repetition penalty. 1.0 means no penalty.
     # See [this paper](https://arxiv.org/pdf/1909.05858.pdf) for more details.
     repetition_penalty: Optional[float] = None
+    # The parameter for frequency penalty. 1.0 means no penalty
+    # Penalize new tokens based on their existing frequency in the text so far,
+    # decreasing the model's likelihood to repeat the same line verbatim.
+    frequency_penalty: Optional[float] = None
     # Whether to prepend the prompt to the generated text
     return_full_text: bool = False
     # Stop generating tokens if a member of `stop_sequences` is generated
@@ -41,74 +182,91 @@ class Parameters(BaseModel):
     decoder_input_details: bool = False
     # Return the N most likely tokens at each step
     top_n_tokens: Optional[int] = None
+    # grammar to use for generation
+    grammar: Optional[Grammar] = None
 
-    @validator("best_of")
+    @field_validator("best_of")
     def valid_best_of(cls, field_value, values):
         if field_value is not None:
             if field_value <= 0:
                 raise ValidationError("`best_of` must be strictly positive")
-            if field_value > 1 and values["seed"] is not None:
+            if field_value > 1 and values.data["seed"] is not None:
                 raise ValidationError("`seed` must not be set when `best_of` is > 1")
             sampling = (
-                values["do_sample"]
-                | (values["temperature"] is not None)
-                | (values["top_k"] is not None)
-                | (values["top_p"] is not None)
-                | (values["typical_p"] is not None)
+                values.data["do_sample"]
+                | (values.data["temperature"] is not None)
+                | (values.data["top_k"] is not None)
+                | (values.data["top_p"] is not None)
+                | (values.data["typical_p"] is not None)
             )
             if field_value > 1 and not sampling:
                 raise ValidationError("you must use sampling when `best_of` is > 1")
 
         return field_value
 
-    @validator("repetition_penalty")
+    @field_validator("repetition_penalty")
     def valid_repetition_penalty(cls, v):
         if v is not None and v <= 0:
             raise ValidationError("`repetition_penalty` must be strictly positive")
         return v
 
-    @validator("seed")
+    @field_validator("frequency_penalty")
+    def valid_frequency_penalty(cls, v):
+        if v is not None and v <= 0:
+            raise ValidationError("`frequency_penalty` must be strictly positive")
+        return v
+
+    @field_validator("seed")
     def valid_seed(cls, v):
         if v is not None and v < 0:
             raise ValidationError("`seed` must be positive")
         return v
 
-    @validator("temperature")
+    @field_validator("temperature")
     def valid_temp(cls, v):
         if v is not None and v <= 0:
             raise ValidationError("`temperature` must be strictly positive")
         return v
 
-    @validator("top_k")
+    @field_validator("top_k")
     def valid_top_k(cls, v):
         if v is not None and v <= 0:
             raise ValidationError("`top_k` must be strictly positive")
         return v
 
-    @validator("top_p")
+    @field_validator("top_p")
     def valid_top_p(cls, v):
         if v is not None and (v <= 0 or v >= 1.0):
             raise ValidationError("`top_p` must be > 0.0 and < 1.0")
         return v
 
-    @validator("truncate")
+    @field_validator("truncate")
     def valid_truncate(cls, v):
         if v is not None and v <= 0:
             raise ValidationError("`truncate` must be strictly positive")
         return v
 
-    @validator("typical_p")
+    @field_validator("typical_p")
     def valid_typical_p(cls, v):
         if v is not None and (v <= 0 or v >= 1.0):
             raise ValidationError("`typical_p` must be > 0.0 and < 1.0")
         return v
 
-    @validator("top_n_tokens")
+    @field_validator("top_n_tokens")
     def valid_top_n_tokens(cls, v):
         if v is not None and v <= 0:
             raise ValidationError("`top_n_tokens` must be strictly positive")
         return v
 
+    @field_validator("grammar")
+    def valid_grammar(cls, v):
+        if v is not None:
+            if v.type == GrammarType.Regex and not v.value:
+                raise ValidationError("`value` cannot be empty for `regex` grammar")
+            if v.type == GrammarType.Json and not v.value:
+                raise ValidationError("`value` cannot be empty for `json` grammar")
+        return v
+
 
 class Request(BaseModel):
     # Prompt
@@ -118,15 +276,15 @@ class Request(BaseModel):
     # Whether to stream output tokens
     stream: bool = False
 
-    @validator("inputs")
+    @field_validator("inputs")
     def valid_input(cls, v):
         if not v:
             raise ValidationError("`inputs` cannot be empty")
         return v
 
-    @validator("stream")
+    @field_validator("stream")
     def valid_best_of_stream(cls, field_value, values):
-        parameters = values["parameters"]
+        parameters = values.data["parameters"]
         if (
             parameters is not None
             and parameters.best_of is not None
@@ -157,7 +315,7 @@ class Token(BaseModel):
     # Token text
     text: str
     # Logprob
-    logprob: float
+    logprob: Optional[float] = None
     # Is the token a special token
     # Can be used to ignore tokens when concatenating
     special: bool
diff --git a/docs/index.html b/docs/index.html
index 16d143d8..f582d3ce 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -27,4 +27,4 @@
             }
         </script>
     </body>
-</html>
\ No newline at end of file
+</html>
diff --git a/docs/openapi.json b/docs/openapi.json
index 6a15ae66..34b030f2 100644
--- a/docs/openapi.json
+++ b/docs/openapi.json
@@ -10,7 +10,7 @@
       "name": "Apache 2.0",
       "url": "https://www.apache.org/licenses/LICENSE-2.0"
     },
-    "version": "1.2.0"
+    "version": "2.0.0"
   },
   "paths": {
     "/": {
@@ -342,6 +342,219 @@
           }
         }
       }
+    },
+    "/tokenize": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Tokenize inputs",
+        "description": "Tokenize inputs",
+        "operationId": "tokenize",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/GenerateRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Tokenized ids",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/TokenizeResponse"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "No tokenizer found",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "No fast tokenizer available"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/v1/chat/completions": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens",
+        "description": "Generate tokens",
+        "operationId": "chat_completions",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/ChatRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionChunk"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/v1/completions": {
+      "post": {
+        "tags": [
+          "Text Generation Inference"
+        ],
+        "summary": "Generate tokens",
+        "description": "Generate tokens",
+        "operationId": "completions",
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/CompletionRequest"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Generated Text",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ChatCompletionChunk"
+                }
+              }
+            }
+          },
+          "422": {
+            "description": "Input validation error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Input validation error"
+                }
+              }
+            }
+          },
+          "424": {
+            "description": "Generation Error",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Request failed during generation"
+                }
+              }
+            }
+          },
+          "429": {
+            "description": "Model is overloaded",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Model is overloaded"
+                }
+              }
+            }
+          },
+          "500": {
+            "description": "Incomplete generation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ErrorResponse"
+                },
+                "example": {
+                  "error": "Incomplete generation"
+                }
+              }
+            }
+          }
+        }
+      }
     }
   },
   "components": {
@@ -399,6 +612,353 @@
           }
         }
       },
+      "ChatCompletion": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "model",
+          "system_fingerprint",
+          "choices",
+          "usage"
+        ],
+        "properties": {
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionComplete"
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "example": "1706270835",
+            "minimum": 0
+          },
+          "id": {
+            "type": "string"
+          },
+          "model": {
+            "type": "string",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "object": {
+            "type": "string"
+          },
+          "system_fingerprint": {
+            "type": "string"
+          },
+          "usage": {
+            "$ref": "#/components/schemas/Usage"
+          }
+        }
+      },
+      "ChatCompletionChoice": {
+        "type": "object",
+        "required": [
+          "index",
+          "delta"
+        ],
+        "properties": {
+          "delta": {
+            "$ref": "#/components/schemas/ChatCompletionDelta"
+          },
+          "finish_reason": {
+            "type": "string",
+            "nullable": true
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "logprobs": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionLogprobs"
+              }
+            ],
+            "nullable": true
+          }
+        }
+      },
+      "ChatCompletionChunk": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "model",
+          "system_fingerprint",
+          "choices"
+        ],
+        "properties": {
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionChoice"
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "example": "1706270978",
+            "minimum": 0
+          },
+          "id": {
+            "type": "string"
+          },
+          "model": {
+            "type": "string",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "object": {
+            "type": "string"
+          },
+          "system_fingerprint": {
+            "type": "string"
+          }
+        }
+      },
+      "ChatCompletionComplete": {
+        "type": "object",
+        "required": [
+          "index",
+          "message",
+          "finish_reason"
+        ],
+        "properties": {
+          "finish_reason": {
+            "type": "string"
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "logprobs": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionLogprobs"
+              }
+            ],
+            "nullable": true
+          },
+          "message": {
+            "$ref": "#/components/schemas/Message"
+          }
+        }
+      },
+      "ChatCompletionDelta": {
+        "type": "object",
+        "required": [
+          "role"
+        ],
+        "properties": {
+          "content": {
+            "type": "string",
+            "example": "What is Deep Learning?",
+            "nullable": true
+          },
+          "role": {
+            "type": "string",
+            "example": "user"
+          },
+          "tool_calls": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/DeltaToolCall"
+              }
+            ],
+            "nullable": true
+          }
+        }
+      },
+      "ChatCompletionLogprob": {
+        "type": "object",
+        "required": [
+          "token",
+          "logprob",
+          "top_logprobs"
+        ],
+        "properties": {
+          "logprob": {
+            "type": "number",
+            "format": "float"
+          },
+          "token": {
+            "type": "string"
+          },
+          "top_logprobs": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionTopLogprob"
+            }
+          }
+        }
+      },
+      "ChatCompletionLogprobs": {
+        "type": "object",
+        "required": [
+          "content"
+        ],
+        "properties": {
+          "content": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionLogprob"
+            }
+          }
+        }
+      },
+      "ChatCompletionTopLogprob": {
+        "type": "object",
+        "required": [
+          "token",
+          "logprob"
+        ],
+        "properties": {
+          "logprob": {
+            "type": "number",
+            "format": "float"
+          },
+          "token": {
+            "type": "string"
+          }
+        }
+      },
+      "ChatRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "messages"
+        ],
+        "properties": {
+          "frequency_penalty": {
+            "type": "number",
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
+            "example": "1.0",
+            "nullable": true
+          },
+          "logit_bias": {
+            "type": "array",
+            "items": {
+              "type": "number",
+              "format": "float"
+            },
+            "description": "UNUSED\nModify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens\n(specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,\nthe bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,\nbut values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should\nresult in a ban or exclusive selection of the relevant token.",
+            "nullable": true
+          },
+          "logprobs": {
+            "type": "boolean",
+            "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each\noutput token returned in the content of message.",
+            "example": "false",
+            "nullable": true
+          },
+          "max_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "description": "The maximum number of tokens that can be generated in the chat completion.",
+            "example": "32",
+            "nullable": true,
+            "minimum": 0
+          },
+          "messages": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Message"
+            },
+            "description": "A list of messages comprising the conversation so far.",
+            "example": "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]"
+          },
+          "model": {
+            "type": "string",
+            "description": "[UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "n": {
+            "type": "integer",
+            "format": "int32",
+            "description": "UNUSED\nHow many chat completion choices to generate for each input message. Note that you will be charged based on the\nnumber of generated tokens across all of the choices. Keep n as 1 to minimize costs.",
+            "example": "2",
+            "nullable": true,
+            "minimum": 0
+          },
+          "presence_penalty": {
+            "type": "number",
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,\nincreasing the model's likelihood to talk about new topics",
+            "example": 0.1,
+            "nullable": true
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          },
+          "stop": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Up to 4 sequences where the API will stop generating further tokens.",
+            "example": "null",
+            "nullable": true
+          },
+          "stream": {
+            "type": "boolean"
+          },
+          "temperature": {
+            "type": "number",
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both.",
+            "example": 1.0,
+            "nullable": true
+          },
+          "tool_choice": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ToolType"
+              }
+            ],
+            "nullable": true
+          },
+          "tool_prompt": {
+            "type": "string",
+            "description": "A prompt to be appended before the tools",
+            "example": "\"Based on the conversation, please choose the most appropriate tool to use: \"",
+            "nullable": true
+          },
+          "tools": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Tool"
+            },
+            "description": "A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of\nfunctions the model may generate JSON inputs for.",
+            "example": "null",
+            "nullable": true
+          },
+          "top_logprobs": {
+            "type": "integer",
+            "format": "int32",
+            "description": "An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with\nan associated log probability. logprobs must be set to true if this parameter is used.",
+            "example": "5",
+            "nullable": true,
+            "minimum": 0
+          },
+          "top_p": {
+            "type": "number",
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
+            "example": 0.95,
+            "nullable": true
+          }
+        }
+      },
       "CompatGenerateRequest": {
         "type": "object",
         "required": [
@@ -418,6 +978,164 @@
           }
         }
       },
+      "CompletionComplete": {
+        "type": "object",
+        "required": [
+          "index",
+          "text",
+          "finish_reason"
+        ],
+        "properties": {
+          "finish_reason": {
+            "type": "string"
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "logprobs": {
+            "type": "array",
+            "items": {
+              "type": "number",
+              "format": "float"
+            },
+            "nullable": true
+          },
+          "text": {
+            "type": "string"
+          }
+        }
+      },
+      "CompletionCompleteChunk": {
+        "type": "object",
+        "required": [
+          "id",
+          "object",
+          "created",
+          "choices",
+          "model",
+          "system_fingerprint"
+        ],
+        "properties": {
+          "choices": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/CompletionComplete"
+            }
+          },
+          "created": {
+            "type": "integer",
+            "format": "int64",
+            "minimum": 0
+          },
+          "id": {
+            "type": "string"
+          },
+          "model": {
+            "type": "string"
+          },
+          "object": {
+            "type": "string"
+          },
+          "system_fingerprint": {
+            "type": "string"
+          }
+        }
+      },
+      "CompletionRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "prompt"
+        ],
+        "properties": {
+          "frequency_penalty": {
+            "type": "number",
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,\ndecreasing the model's likelihood to repeat the same line verbatim.",
+            "example": "1.0",
+            "nullable": true
+          },
+          "max_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "description": "The maximum number of tokens that can be generated in the chat completion.",
+            "default": "32",
+            "nullable": true,
+            "minimum": 0
+          },
+          "model": {
+            "type": "string",
+            "description": "UNUSED\nID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.",
+            "example": "mistralai/Mistral-7B-Instruct-v0.2"
+          },
+          "prompt": {
+            "type": "string",
+            "description": "The prompt to generate completions for.",
+            "example": "What is Deep Learning?"
+          },
+          "repetition_penalty": {
+            "type": "number",
+            "format": "float",
+            "nullable": true
+          },
+          "seed": {
+            "type": "integer",
+            "format": "int64",
+            "example": 42,
+            "nullable": true,
+            "minimum": 0
+          },
+          "stream": {
+            "type": "boolean"
+          },
+          "suffix": {
+            "type": "string",
+            "description": "The text to append to the prompt. This is useful for completing sentences or generating a paragraph of text.\nplease see the completion_template field in the model's tokenizer_config.json file for completion template.",
+            "nullable": true
+          },
+          "temperature": {
+            "type": "number",
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while\nlower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.",
+            "example": 1.0,
+            "nullable": true
+          },
+          "top_p": {
+            "type": "number",
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the\ntokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.",
+            "example": 0.95,
+            "nullable": true
+          }
+        }
+      },
+      "DeltaToolCall": {
+        "type": "object",
+        "required": [
+          "index",
+          "id",
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/Function"
+          },
+          "id": {
+            "type": "string"
+          },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "type": {
+            "type": "string"
+          }
+        }
+      },
       "Details": {
         "type": "object",
         "required": [
@@ -494,7 +1212,40 @@
           "length",
           "eos_token",
           "stop_sequence"
-        ]
+        ],
+        "example": "Length"
+      },
+      "Function": {
+        "type": "object",
+        "required": [
+          "arguments"
+        ],
+        "properties": {
+          "arguments": {
+            "type": "string"
+          },
+          "name": {
+            "type": "string",
+            "nullable": true
+          }
+        }
+      },
+      "FunctionDefinition": {
+        "type": "object",
+        "required": [
+          "name",
+          "parameters"
+        ],
+        "properties": {
+          "description": {
+            "type": "string",
+            "nullable": true
+          },
+          "name": {
+            "type": "string"
+          },
+          "parameters": {}
+        }
       },
       "GenerateParameters": {
         "type": "object",
@@ -520,10 +1271,26 @@
             "default": "false",
             "example": true
           },
+          "frequency_penalty": {
+            "type": "number",
+            "format": "float",
+            "default": "null",
+            "example": 0.1,
+            "nullable": true,
+            "exclusiveMinimum": -2
+          },
+          "grammar": {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/GrammarType"
+              }
+            ],
+            "nullable": true
+          },
           "max_new_tokens": {
             "type": "integer",
             "format": "int32",
-            "default": "null",
+            "default": "100",
             "example": "20",
             "nullable": true,
             "minimum": 0
@@ -653,6 +1420,49 @@
           }
         }
       },
+      "GrammarType": {
+        "oneOf": [
+          {
+            "type": "object",
+            "required": [
+              "type",
+              "value"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "json"
+                ]
+              },
+              "value": {
+                "description": "A string that represents a [JSON Schema](https://json-schema.org/).\n\nJSON Schema is a declarative language that allows to annotate JSON documents\nwith types and descriptions."
+              }
+            }
+          },
+          {
+            "type": "object",
+            "required": [
+              "type",
+              "value"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "regex"
+                ]
+              },
+              "value": {
+                "type": "string"
+              }
+            }
+          }
+        ],
+        "discriminator": {
+          "propertyName": "type"
+        }
+      },
       "Info": {
         "type": "object",
         "required": [
@@ -676,6 +1486,12 @@
             "example": "null",
             "nullable": true
           },
+          "max_batch_size": {
+            "type": "integer",
+            "example": "null",
+            "nullable": true,
+            "minimum": 0
+          },
           "max_batch_total_tokens": {
             "type": "integer",
             "format": "int32",
@@ -758,6 +1574,35 @@
           }
         }
       },
+      "Message": {
+        "type": "object",
+        "required": [
+          "role"
+        ],
+        "properties": {
+          "content": {
+            "type": "string",
+            "example": "My name is David and I",
+            "nullable": true
+          },
+          "name": {
+            "type": "string",
+            "example": "\"David\"",
+            "nullable": true
+          },
+          "role": {
+            "type": "string",
+            "example": "user"
+          },
+          "tool_calls": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ToolCall"
+            },
+            "nullable": true
+          }
+        }
+      },
       "PrefillToken": {
         "type": "object",
         "required": [
@@ -784,6 +1629,37 @@
           }
         }
       },
+      "SimpleToken": {
+        "type": "object",
+        "required": [
+          "id",
+          "text",
+          "start",
+          "stop"
+        ],
+        "properties": {
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "example": 0,
+            "minimum": 0
+          },
+          "start": {
+            "type": "integer",
+            "example": 0,
+            "minimum": 0
+          },
+          "stop": {
+            "type": "integer",
+            "example": 2,
+            "minimum": 0
+          },
+          "text": {
+            "type": "string",
+            "example": "test"
+          }
+        }
+      },
       "StreamDetails": {
         "type": "object",
         "required": [
@@ -812,6 +1688,7 @@
       "StreamResponse": {
         "type": "object",
         "required": [
+          "index",
           "token"
         ],
         "properties": {
@@ -830,6 +1707,11 @@
             "example": "test",
             "nullable": true
           },
+          "index": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
           "token": {
             "$ref": "#/components/schemas/Token"
           },
@@ -871,6 +1753,95 @@
             "example": "test"
           }
         }
+      },
+      "TokenizeResponse": {
+        "type": "array",
+        "items": {
+          "$ref": "#/components/schemas/SimpleToken"
+        }
+      },
+      "Tool": {
+        "type": "object",
+        "required": [
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionDefinition"
+          },
+          "type": {
+            "type": "string",
+            "example": "function"
+          }
+        }
+      },
+      "ToolCall": {
+        "type": "object",
+        "required": [
+          "id",
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionDefinition"
+          },
+          "id": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "type": {
+            "type": "string"
+          }
+        }
+      },
+      "ToolType": {
+        "oneOf": [
+          {
+            "type": "object",
+            "required": [
+              "FunctionName"
+            ],
+            "properties": {
+              "FunctionName": {
+                "type": "string"
+              }
+            }
+          },
+          {
+            "type": "string",
+            "enum": [
+              "OneOf"
+            ]
+          }
+        ]
+      },
+      "Usage": {
+        "type": "object",
+        "required": [
+          "prompt_tokens",
+          "completion_tokens",
+          "total_tokens"
+        ],
+        "properties": {
+          "completion_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "prompt_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          },
+          "total_tokens": {
+            "type": "integer",
+            "format": "int32",
+            "minimum": 0
+          }
+        }
       }
     }
   },
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 6fa50a6a..1598c248 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -7,6 +7,8 @@
     title: Installation
   - local: supported_models
     title: Supported Models and Hardware
+  - local: messages_api
+    title: Messages API
   title: Getting started
 - sections:
   - local: basic_tutorials/consuming_tgi
@@ -21,6 +23,8 @@
     title: All TGI CLI  options
   - local: basic_tutorials/non_core_models
     title: Non-core Model Serving
+  - local: basic_tutorials/safety
+    title: Safety
   title: Tutorials
 - sections:
   - local: conceptual/streaming
@@ -35,4 +39,8 @@
     title: Safetensors
   - local: conceptual/flash_attention
     title: Flash Attention
+  - local: conceptual/speculation
+    title: Speculation (Medusa, ngram)
+  - local: conceptual/guidance
+    title: Guidance, JSON, tools (using outlines)
   title: Conceptual Guides
diff --git a/docs/source/basic_tutorials/consuming_tgi.md b/docs/source/basic_tutorials/consuming_tgi.md
index 540f4b13..4829ec7c 100644
--- a/docs/source/basic_tutorials/consuming_tgi.md
+++ b/docs/source/basic_tutorials/consuming_tgi.md
@@ -23,7 +23,7 @@ You can simply install `huggingface-hub` package with pip.
 pip install huggingface-hub
 ```
 
-Once you start the TGI server, instantiate `InferenceClient()` with the URL to the endpoint serving the model. You can then call `text_generation()` to hit the endpoint through Python. 
+Once you start the TGI server, instantiate `InferenceClient()` with the URL to the endpoint serving the model. You can then call `text_generation()` to hit the endpoint through Python.
 
 ```python
 from huggingface_hub import InferenceClient
@@ -83,8 +83,8 @@ Gradio is a Python library that helps you build web applications for your machin
 pip install huggingface-hub gradio
 ```
 
-Assume you are serving your model on port 8080, we will query through [InferenceClient](consuming_tgi#inference-client). 
- 
+Assume you are serving your model on port 8080, we will query through [InferenceClient](consuming_tgi#inference-client).
+
 ```python
 import gradio as gr
 from huggingface_hub import InferenceClient
@@ -110,30 +110,30 @@ gr.ChatInterface(
 ).queue().launch()
 ```
 
-The UI looks like this 👇 
+The UI looks like this 👇
 
 <div class="flex justify-center">
-    <img 
-        class="block dark:hidden" 
+    <img
+        class="block dark:hidden"
         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/gradio-tgi.png"
     />
-    <img 
-        class="hidden dark:block" 
+    <img
+        class="hidden dark:block"
         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/gradio-tgi-dark.png"
     />
 </div>
 
-You can try the demo directly here 👇 
+You can try the demo directly here 👇
 
 <div class="block dark:hidden">
-	<iframe 
+	<iframe
         src="https://merve-gradio-tgi-2.hf.space?__theme=light"
         width="850"
         height="750"
     ></iframe>
 </div>
 <div class="hidden dark:block">
-    <iframe 
+    <iframe
         src="https://merve-gradio-tgi-2.hf.space?__theme=dark"
         width="850"
         height="750"
@@ -152,4 +152,4 @@ You can read more about how to customize a `ChatInterface` [here](https://www.gr
 
 ## API documentation
 
-You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route. The Swagger UI is also available [here](https://huggingface.github.io/text-generation-inference). 
+You can consult the OpenAPI documentation of the `text-generation-inference` REST API using the `/docs` route. The Swagger UI is also available [here](https://huggingface.github.io/text-generation-inference).
diff --git a/docs/source/basic_tutorials/gated_model_access.md b/docs/source/basic_tutorials/gated_model_access.md
index e1abccac..060d177d 100644
--- a/docs/source/basic_tutorials/gated_model_access.md
+++ b/docs/source/basic_tutorials/gated_model_access.md
@@ -19,6 +19,6 @@ docker run --gpus all \
     --shm-size 1g \
     -e HUGGING_FACE_HUB_TOKEN=$token \
     -p 8080:80 \
-    -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 \
+    -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 \
     --model-id $model
 ```
diff --git a/docs/source/basic_tutorials/launcher.md b/docs/source/basic_tutorials/launcher.md
index 62abe8c6..d9b272db 100644
--- a/docs/source/basic_tutorials/launcher.md
+++ b/docs/source/basic_tutorials/launcher.md
@@ -60,12 +60,21 @@ Options:
           [env: QUANTIZE=]
 
           Possible values:
-          - awq:              4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=awq. Should replace GPTQ models whereever possible because of the better latency
-          - eetq:             8 bit quantization, doesn't require specific model. Should be a drop-in replacement to bitsandbytes with much better performance. Kernels are from https://github.com/NetEase-FuXi/EETQ.git
-          - gptq:             4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq. text-generation-inference will use exllama (faster) kernels whereever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
+          - awq:              4 bit quantization. Requires a specific AWQ quantized model: <https://hf.co/models?search=awq>. Should replace GPTQ models wherever possible because of the better latency
+          - eetq:             8 bit quantization, doesn't require specific model. Should be a drop-in replacement to bitsandbytes with much better performance. Kernels are from <https://github.com/NetEase-FuXi/EETQ.git>
+          - gptq:             4 bit quantization. Requires a specific GTPQ quantized model: <https://hf.co/models?search=gptq>. text-generation-inference will use exllama (faster) kernels wherever possible, and use triton kernel (wider support) when it's not. AWQ has faster kernels
           - bitsandbytes:     Bitsandbytes 8bit. Can be applied on any model, will cut the memory requirement in half, but it is known that the model will be much slower to run than the native f16
           - bitsandbytes-nf4: Bitsandbytes 4bit. Can be applied on any model, will cut the memory requirement by 4x, but it is known that the model will be much slower to run than the native f16
           - bitsandbytes-fp4: Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better perplexity performance for you model
+          - fp8:              [FP8](https://developer.nvidia.com/blog/nvidia-arm-and-intel-publish-fp8-specification-for-standardization-as-an-interchange-format-for-ai/) (e4m3) works on H100 and above This dtype has native ops should be the fastest if available. This is currently not the fastest because of local unpacking + padding to satisfy matrix multiplication limitations
+
+```
+## SPECULATE
+```shell
+      --speculate <SPECULATE>
+          The number of input_ids to speculate on If using a medusa model, the heads will be picked up automatically Other wise, it will use n-gram speculation which is relatively free in terms of compute, but the speedup heavily depends on the task
+          
+          [env: SPECULATE=]
 
 ```
 ## DTYPE
@@ -120,23 +129,29 @@ Options:
           [env: MAX_TOP_N_TOKENS=]
           [default: 5]
 
+```
+## MAX_INPUT_TOKENS
+```shell
+      --max-input-tokens <MAX_INPUT_TOKENS>
+          This is the maximum allowed input length (expressed in number of tokens) for users. The larger this value, the longer prompt users can send which can impact the overall memory required to handle the load. Please note that some models have a finite range of sequence they can handle. Default to min(max_position_embeddings - 1, 4095)
+          
+          [env: MAX_INPUT_TOKENS=]
+
 ```
 ## MAX_INPUT_LENGTH
 ```shell
       --max-input-length <MAX_INPUT_LENGTH>
-          This is the maximum allowed input length (expressed in number of tokens) for users. The larger this value, the longer prompt users can send which can impact the overall memory required to handle the load. Please note that some models have a finite range of sequence they can handle
+          Legacy version of [`Args::max_input_tokens`]
           
           [env: MAX_INPUT_LENGTH=]
-          [default: 1024]
 
 ```
 ## MAX_TOTAL_TOKENS
 ```shell
       --max-total-tokens <MAX_TOTAL_TOKENS>
-          This is the most important value to set as it defines the "memory budget" of running clients requests. Clients will send input sequences and ask to generate `max_new_tokens` on top. with a value of `1512` users can send either a prompt of `1000` and ask for `512` new tokens, or send a prompt of `1` and ask for `1511` max_new_tokens. The larger this value, the larger amount each request will be in your RAM and the less effective batching can be
+          This is the most important value to set as it defines the "memory budget" of running clients requests. Clients will send input sequences and ask to generate `max_new_tokens` on top. with a value of `1512` users can send either a prompt of `1000` and ask for `512` new tokens, or send a prompt of `1` and ask for `1511` max_new_tokens. The larger this value, the larger amount each request will be in your RAM and the less effective batching can be. Default to min(max_position_embeddings, 4096)
           
           [env: MAX_TOTAL_TOKENS=]
-          [default: 2048]
 
 ```
 ## WAITING_SERVED_RATIO
@@ -153,10 +168,9 @@ Options:
 ## MAX_BATCH_PREFILL_TOKENS
 ```shell
       --max-batch-prefill-tokens <MAX_BATCH_PREFILL_TOKENS>
-          Limits the number of tokens for the prefill operation. Since this operation take the most memory and is compute bound, it is interesting to limit the number of requests that can be sent
+          Limits the number of tokens for the prefill operation. Since this operation take the most memory and is compute bound, it is interesting to limit the number of requests that can be sent. Default to `max_input_tokens + 50` to give a bit of room
           
           [env: MAX_BATCH_PREFILL_TOKENS=]
-          [default: 4096]
 
 ```
 ## MAX_BATCH_TOTAL_TOKENS
@@ -189,6 +203,22 @@ Options:
           [env: MAX_WAITING_TOKENS=]
           [default: 20]
 
+```
+## MAX_BATCH_SIZE
+```shell
+      --max-batch-size <MAX_BATCH_SIZE>
+          Enforce a maximum number of requests per batch Specific flag for hardware targets that do not support unpadded inference
+          
+          [env: MAX_BATCH_SIZE=]
+
+```
+## CUDA_GRAPHS
+```shell
+      --cuda-graphs <CUDA_GRAPHS>
+          Specify the batch sizes to compute cuda graphs for. Use "0" to disable. Default = "1,2,4,8,16,32"
+          
+          [env: CUDA_GRAPHS=]
+
 ```
 ## HOSTNAME
 ```shell
@@ -346,6 +376,22 @@ Options:
           
           [env: NGROK_EDGE=]
 
+```
+## TOKENIZER_CONFIG_PATH
+```shell
+      --tokenizer-config-path <TOKENIZER_CONFIG_PATH>
+          The path to the tokenizer config file. This path is used to load the tokenizer configuration which may include a `chat_template`. If not provided, the default config will be used from the model hub
+          
+          [env: TOKENIZER_CONFIG_PATH=]
+
+```
+## DISABLE_GRAMMAR_SUPPORT
+```shell
+      --disable-grammar-support
+          Disable outlines grammar constrained generation. This is a feature that allows you to generate text that follows a specific grammar
+          
+          [env: DISABLE_GRAMMAR_SUPPORT=]
+
 ```
 ## ENV
 ```shell
diff --git a/docs/source/basic_tutorials/non_core_models.md b/docs/source/basic_tutorials/non_core_models.md
index 6f2e6cfa..2badaff0 100644
--- a/docs/source/basic_tutorials/non_core_models.md
+++ b/docs/source/basic_tutorials/non_core_models.md
@@ -2,19 +2,19 @@
 
 TGI supports various LLM architectures (see full list [here](../supported_models)). If you wish to serve a model that is not one of the supported models, TGI will fallback to the `transformers` implementation of that model. This means you will be unable to use some of the features introduced by TGI, such as tensor-parallel sharding or flash attention. However, you can still get many benefits of TGI, such as continuous batching or streaming outputs.
 
-You can serve these models using the same Docker command-line invocation as with fully supported models 👇 
+You can serve these models using the same Docker command-line invocation as with fully supported models 👇
 
 ```bash
 docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id gpt2
 ```
 
-If the model you wish to serve is a custom transformers model, and its weights and implementation are available in the Hub, you can still serve the model by passing the `--trust-remote-code` flag to the `docker run` command like below 👇 
+If the model you wish to serve is a custom transformers model, and its weights and implementation are available in the Hub, you can still serve the model by passing the `--trust-remote-code` flag to the `docker run` command like below 👇
 
 ```bash
 docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id <CUSTOM_MODEL_ID> --trust-remote-code
 ```
 
-Finally, if the model is not on Hugging Face Hub but on your local, you can pass the path to the folder that contains your model like below 👇 
+Finally, if the model is not on Hugging Face Hub but on your local, you can pass the path to the folder that contains your model like below 👇
 
 ```bash
 # Make sure your model is in the $volume directory
diff --git a/docs/source/basic_tutorials/preparing_model.md b/docs/source/basic_tutorials/preparing_model.md
index ea74d18c..71ca5598 100644
--- a/docs/source/basic_tutorials/preparing_model.md
+++ b/docs/source/basic_tutorials/preparing_model.md
@@ -1,6 +1,6 @@
 # Preparing the Model
 
-Text Generation Inference improves the model in several aspects. 
+Text Generation Inference improves the model in several aspects.
 
 ## Quantization
 
@@ -9,7 +9,7 @@ TGI supports [bits-and-bytes](https://github.com/TimDettmers/bitsandbytes#bitsan
 
 ## RoPE Scaling
 
-RoPE scaling can be used to increase the sequence length of the model during the inference time without necessarily fine-tuning it. To enable RoPE scaling, simply pass `--rope-scaling`, `--max-input-length` and `--rope-factors` flags when running through CLI. `--rope-scaling` can take the values `linear` or `dynamic`. If your model is not fine-tuned to a longer sequence length, use `dynamic`. `--rope-factor` is the ratio between the intended max sequence length and the model's original max sequence length. Make sure to pass `--max-input-length` to provide maximum input length for extension. 
+RoPE scaling can be used to increase the sequence length of the model during the inference time without necessarily fine-tuning it. To enable RoPE scaling, simply pass `--rope-scaling`, `--max-input-length` and `--rope-factors` flags when running through CLI. `--rope-scaling` can take the values `linear` or `dynamic`. If your model is not fine-tuned to a longer sequence length, use `dynamic`. `--rope-factor` is the ratio between the intended max sequence length and the model's original max sequence length. Make sure to pass `--max-input-length` to provide maximum input length for extension.
 
 <Tip>
 
@@ -19,4 +19,4 @@ We recommend using `dynamic` RoPE scaling.
 
 ## Safetensors
 
-[Safetensors](https://github.com/huggingface/safetensors) is a fast and safe persistence format for deep learning models, and is required for tensor parallelism. TGI supports `safetensors` model loading under the hood. By default, given a repository with `safetensors` and `pytorch` weights, TGI will always load `safetensors`. If there's no `pytorch` weights, TGI will convert the weights to `safetensors` format. 
+[Safetensors](https://github.com/huggingface/safetensors) is a fast and safe persistence format for deep learning models, and is required for tensor parallelism. TGI supports `safetensors` model loading under the hood. By default, given a repository with `safetensors` and `pytorch` weights, TGI will always load `safetensors`. If there's no `pytorch` weights, TGI will convert the weights to `safetensors` format.
diff --git a/docs/source/basic_tutorials/safety.md b/docs/source/basic_tutorials/safety.md
new file mode 100644
index 00000000..0b865db4
--- /dev/null
+++ b/docs/source/basic_tutorials/safety.md
@@ -0,0 +1,31 @@
+# Model safety.
+
+[Pytorch uses pickle](https://pytorch.org/docs/master/generated/torch.load.html) by default meaning that for quite a long while
+*Every* model using that format is potentially executing unintended code while purely loading the model.
+
+There is a big red warning on Python's page for pickle [link](https://docs.python.org/3/library/pickle.html) but for quite a while
+this was ignored by the community. Now that AI/ML is getting used much more ubiquitously we need to switch away from this format.
+
+HuggingFace is leading the effort here by creating a new format which contains pure data ([safetensors](https://github.com/huggingface/safetensors))
+and moving slowly but surely all the libs to make use of it by default.
+The move is intentionnally slow in order to make breaking changes as little impact as possible on users throughout.
+
+
+# TGI 2.0
+
+Since the release of TGI 2.0, we take the opportunity of this major version increase to break backward compatibility for these pytorch
+models (since they are a huge security risk for anyone deploying them).
+
+
+From now on, TGI will not convert automatically pickle files without having `--trust-remote-code` flag or `TRUST_REMOTE_CODE=true` in the environment variables.
+This flag is already used for community defined inference code, and is therefore quite representative of the level of confidence you are giving the model providers.
+
+
+If you want to use a model that uses pickle, but you still do not want to trust the authors entirely we recommend making a convertion on our space made for that.
+
+https://huggingface.co/spaces/safetensors/convert
+
+This space will create a PR on the original model, which you are use directly regardless of merge status from the original authors. Just use
+```
+docker run .... --revision refs/pr/#ID # Or use REVISION=refs/pr/#ID in the environment
+```
diff --git a/docs/source/basic_tutorials/using_cli.md b/docs/source/basic_tutorials/using_cli.md
index 82c10e6b..64554069 100644
--- a/docs/source/basic_tutorials/using_cli.md
+++ b/docs/source/basic_tutorials/using_cli.md
@@ -1,30 +1,30 @@
 # Using TGI CLI
 
-You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. To install the CLI, please refer to [the installation section](./installation#install-cli).
+You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. To install the CLI, please refer to [the installation section](../installation#install-cli).
 
-`text-generation-server` lets you download the model with `download-weights` command like below 👇 
+`text-generation-server` lets you download the model with `download-weights` command like below 👇
 
 ```bash
 text-generation-server download-weights MODEL_HUB_ID
 ```
 
-You can also use it to quantize models like below 👇 
+You can also use it to quantize models like below 👇
 
 ```bash
-text-generation-server quantize MODEL_HUB_ID OUTPUT_DIR 
+text-generation-server quantize MODEL_HUB_ID OUTPUT_DIR
 ```
 
-You can use `text-generation-launcher` to serve models. 
+You can use `text-generation-launcher` to serve models.
 
 ```bash
 text-generation-launcher --model-id MODEL_HUB_ID --port 8080
 ```
 
-There are many options and parameters you can pass to `text-generation-launcher`. The documentation for CLI is kept minimal and intended to rely on self-generating documentation, which can be found by running 
+There are many options and parameters you can pass to `text-generation-launcher`. The documentation for CLI is kept minimal and intended to rely on self-generating documentation, which can be found by running
 
 ```bash
 text-generation-launcher --help
-``` 
+```
 
 You can also find it hosted in this [Swagger UI](https://huggingface.github.io/text-generation-inference/).
 
diff --git a/docs/source/conceptual/flash_attention.md b/docs/source/conceptual/flash_attention.md
index 1f3a6293..6b13cd13 100644
--- a/docs/source/conceptual/flash_attention.md
+++ b/docs/source/conceptual/flash_attention.md
@@ -1,12 +1,11 @@
 # Flash Attention
 
-Scaling the transformer architecture is heavily bottlenecked by the self-attention mechanism, which has quadratic time and memory complexity. Recent developments in accelerator hardware mainly focus on enhancing compute capacities and not memory and transferring data between hardware. This results in attention operation having a memory bottleneck. **Flash Attention** is an attention algorithm used to reduce this problem and scale transformer-based models more efficiently, enabling faster training and inference. 
+Scaling the transformer architecture is heavily bottlenecked by the self-attention mechanism, which has quadratic time and memory complexity. Recent developments in accelerator hardware mainly focus on enhancing compute capacities and not memory and transferring data between hardware. This results in attention operation having a memory bottleneck. **Flash Attention** is an attention algorithm used to reduce this problem and scale transformer-based models more efficiently, enabling faster training and inference.
 
-Standard attention mechanism uses High Bandwidth Memory (HBM) to store, read and write keys, queries and values. HBM is large in memory, but slow in processing, meanwhile SRAM is smaller in memory, but faster in operations. In the standard attention implementation, the cost of loading and writing keys, queries, and values from HBM is high. It loads keys, queries, and values from HBM to GPU on-chip SRAM, performs a single step of the attention mechanism, writes it back to HBM, and repeats this for every single attention step. Instead, Flash Attention loads keys, queries, and values once, fuses the operations of the attention mechanism, and writes them back. 
+Standard attention mechanism uses High Bandwidth Memory (HBM) to store, read and write keys, queries and values. HBM is large in memory, but slow in processing, meanwhile SRAM is smaller in memory, but faster in operations. In the standard attention implementation, the cost of loading and writing keys, queries, and values from HBM is high. It loads keys, queries, and values from HBM to GPU on-chip SRAM, performs a single step of the attention mechanism, writes it back to HBM, and repeats this for every single attention step. Instead, Flash Attention loads keys, queries, and values once, fuses the operations of the attention mechanism, and writes them back.
 
 ![Flash Attention](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/flash-attn.png)
 
 It is implemented for supported models. You can check out the complete list of models that support Flash Attention [here](https://github.com/huggingface/text-generation-inference/tree/main/server/text_generation_server/models), for models with flash prefix.
 
 You can learn more about Flash Attention by reading the paper in this [link](https://arxiv.org/abs/2205.14135).
-
diff --git a/docs/source/conceptual/guidance.md b/docs/source/conceptual/guidance.md
new file mode 100644
index 00000000..0a3bbd60
--- /dev/null
+++ b/docs/source/conceptual/guidance.md
@@ -0,0 +1,419 @@
+# Guidance
+
+Text Generation Inference (TGI) now supports [JSON and regex grammars](#grammar-and-constraints) and [tools and functions](#tools-and-functions) to help developer guide LLM responses to fit their needs.
+
+These feature are available starting from version `1.4.3`. They are accessible via the [text_generation](https://pypi.org/project/text-generation/) library and is compatible with OpenAI's client libraries. The following guide will walk you through the new features and how to use them!
+
+## Quick Start
+
+Before we jump into the deep end, ensure your system is using TGI version `1.4.3` or later to access all the features we're about to explore in this guide.
+
+If you're not up to date, grab the latest version and let's get started!
+
+## Table of Contents 📚
+
+### Grammar and Constraints
+
+- [The Grammar Parameter](#the-grammar-parameter): Shape your AI's responses with precision.
+- [Constrain with Pydantic](#constrain-with-pydantic): Define a grammar using Pydantic models.
+- [JSON Schema Integration](#json-schema-integration): Fine grain control over your requests via JSON schema.
+- [Using the client](#using-the-client): Use TGI's client libraries to shape the AI's responses.
+
+### Tools and Functions
+
+- [The Tools Parameter](#the-tools-parameter): Enhance the AI's capabilities with predefined functions.
+- [Via the client](#text-generation-inference-client): Use TGI's client libraries to interact with the Messages API and Tool functions.
+- [OpenAI integration](#openai-integration): Use OpenAI's client libraries to interact with TGI's Messages API and Tool functions.
+
+## Grammar and Constraints 🛣️
+
+### The Grammar Parameter
+
+In TGI `1.4.3`, we've introduced the grammar parameter, which allows you to specify the format of the response you want from the AI. This is a game-changer for those who need precise control over the AI's output.
+
+Using curl, you can make a request to TGI's Messages API with the grammar parameter. This is the most primitive way to interact with the API and using [Pydantic](#constrain-with-pydantic) is recommended for ease of use and readability.
+
+```json
+curl localhost:3000/generate \
+    -X POST \
+    -H 'Content-Type: application/json' \
+    -d '{
+    "inputs": "I saw a puppy a cat and a raccoon during my bike ride in the park",
+    "parameters": {
+        "repetition_penalty": 1.3,
+        "grammar": {
+            "type": "json",
+            "value": {
+                "properties": {
+                    "location": {
+                        "type": "string"
+                    },
+                    "activity": {
+                        "type": "string"
+                    },
+                    "animals_seen": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 5
+                    },
+                    "animals": {
+                        "type": "array",
+                        "items": {
+                            "type": "string"
+                        }
+                    }
+                },
+                "required": ["location", "activity", "animals_seen", "animals"]
+            }
+        }
+    }
+}'
+// {"generated_text":"{ \n\n\"activity\": \"biking\",\n\"animals\": [\"puppy\",\"cat\",\"raccoon\"],\n\"animals_seen\": 3,\n\"location\": \"park\"\n}"}
+
+```
+
+A grammar can be defined using Pydantic models, JSON schema, or regular expressions. The AI will then generate a response that conforms to the specified grammar.
+
+> Note: A grammar must compile to a intermediate representation to constrain the output. Grammar compilation is a computationally expensive and may take a few seconds to complete on the first request. Subsequent requests will use the cached grammar and will be much faster.
+
+### Constrain with Pydantic
+
+Pydantic is a powerful library for data validation and settings management. It's the perfect tool for crafting the a specific response format.
+
+Using Pydantic models we can define a similar grammar as the previous example in a shorter and more readable way.
+
+```python
+import requests
+from pydantic import BaseModel, conint
+from typing import List
+
+class Animals(BaseModel):
+    location: str
+    activity: str
+    animals_seen: conint(ge=1, le=5)  # Constrained integer type
+    animals: List[str]
+
+prompt = "convert to JSON: I saw a puppy a cat and a raccoon during my bike ride in the park"
+
+data = {
+    "inputs": prompt,
+    "parameters": {
+        "repetition_penalty": 1.3,
+        "grammar": {
+            "type": "json",
+            "value": Animals.schema()
+        }
+    }
+}
+
+headers = {
+    "Content-Type": "application/json",
+}
+
+response = requests.post(
+    'http://127.0.0.1:3000/generate',
+    headers=headers,
+    json=data
+)
+print(response.json())
+# {'generated_text': '{ "activity": "bike riding", "animals": ["puppy","cat","raccoon"],"animals_seen": 3, "location":"park" }'}
+
+```
+
+### JSON Schema Integration
+
+If Pydantic's not your style, go raw with direct JSON Schema integration. It's like having a conversation with the AI in its own language. This is simliar to the first example but with programmatic control.
+
+```python
+import requests
+
+json_schema = {
+    "properties": {
+        "location": {
+            "type": "string"
+        },
+        "activity": {
+            "type": "string"
+        },
+        "animals_seen": {
+            "type": "integer",
+            "minimum": 1,
+            "maximum": 5
+        },
+        "animals": {
+            "type": "array",
+            "items": {
+                "type": "string"
+            }
+        }
+    },
+    "required": ["location", "activity", "animals_seen", "animals"]
+}
+
+data = {
+    "inputs": "[INST]convert to JSON: I saw a puppy a cat and a raccoon during my bike ride in the park [/INST]",
+    "parameters": {
+        "max_new_tokens": 200,
+        "repetition_penalty": 1.3,
+        "grammar": {
+            "type": "json",
+            "value": json_schema
+        }
+    }
+}
+
+headers = {
+    "Content-Type": "application/json",
+}
+
+response = requests.post(
+    'http://127.0.0.1:3000/generate',
+    headers=headers,
+    json=data
+)
+print(response.json())
+# {'generated_text': '{\n"activity": "biking",\n"animals": ["puppy","cat","raccoon"]\n  , "animals_seen": 3,\n   "location":"park"}'}
+
+```
+
+### Using the client
+
+TGI provides a client library to that make it easy to send requests with all of the parameters we've discussed above. Here's an example of how to use the client to send a request with a grammar parameter.
+
+```python
+from text_generation import AsyncClient
+from text_generation.types import GrammarType
+
+# NOTE: tools defined above and removed for brevity
+
+# Define an async function to encapsulate the async operation
+async def main():
+    client = AsyncClient(base_url="http://localhost:3000")
+
+    # Use 'await' to wait for the async method 'chat' to complete
+    response = await client.generate(
+        "Whats Googles DNS",
+        max_new_tokens=10,
+        decoder_input_details=True,
+        seed=1,
+        grammar={
+            "type": GrammarType.Regex,
+            "value": "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)",
+        },
+    )
+
+    # Once the response is received, you can process it
+    print(response.generated_text)
+
+# Ensure the main async function is run in the event loop
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
+
+# 118.8.0.84
+
+```
+
+## Tools and Functions 🛠️
+
+### The Tools Parameter
+
+In addition to the grammar parameter, we've also introduced a set of tools and functions to help you get the most out of the Messages API.
+
+Tools are a set of user defined functions that can be used in tandem with the chat functionality to enhance the AI's capabilities. You can use these tools to perform a variety of tasks, such as data manipulation, formatting, and more.
+
+Functions, similar to grammar are defined as JSON schema and can be passed as part of the parameters to the Messages API.
+
+```json
+curl localhost:3000/v1/chat/completions \
+    -X POST \
+    -H 'Content-Type: application/json' \
+    -d '{
+    "model": "tgi",
+    "messages": [
+        {
+            "role": "user",
+            "content": "What is the weather like in New York?"
+        }
+    ],
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA"
+                        },
+                        "format": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "The temperature unit to use. Infer this from the users location."
+                        }
+                    },
+                    "required": ["location", "format"]
+                }
+            }
+        }
+    ],
+    "tool_choice": "get_current_weather"
+}'
+// {"id":"","object":"text_completion","created":1709051640,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-native","choices":[{"index":0,"message":{"role":"assistant","tool_calls":{"id":0,"type":"function","function":{"description":null,"name":"tools","parameters":{"format":"celsius","location":"New York"}}}},"logprobs":null,"finish_reason":"eos_token"}],"usage":{"prompt_tokens":157,"completion_tokens":19,"total_tokens":176}}
+```
+
+<details>
+  <summary>Tools used in example below</summary>
+
+  ```python
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_current_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "format": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "The temperature unit to use. Infer this from the users location.",
+                        },
+                    },
+                    "required": ["location", "format"],
+                },
+            },
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_n_day_weather_forecast",
+                "description": "Get an N-day weather forecast",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city and state, e.g. San Francisco, CA",
+                        },
+                        "format": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "The temperature unit to use. Infer this from the users location.",
+                        },
+                        "num_days": {
+                            "type": "integer",
+                            "description": "The number of days to forecast",
+                        },
+                    },
+                    "required": ["location", "format", "num_days"],
+                },
+            },
+        }
+    ]
+  ```
+
+</details>
+
+### Text Generation Inference Client
+
+TGI provides a client library to interact with the Messages API and Tool functions. The client library is available in both synchronous and asynchronous versions.
+
+```python
+from text_generation import AsyncClient
+
+# NOTE: tools defined above and removed for brevity
+
+# Define an async function to encapsulate the async operation
+async def main():
+    client = AsyncClient(base_url="http://localhost:3000")
+
+    # Use 'await' to wait for the async method 'chat' to complete
+    response = await client.chat(
+        max_tokens=100,
+        seed=1,
+        tools=tools,
+        presence_penalty=-1.1,
+        messages=[
+            {
+                "role": "system",
+                "content": "You're a helpful assistant! Answer the users question best you can.",
+            },
+            {
+                "role": "user",
+                "content": "What is the weather like in Brooklyn, New York?",
+            },
+        ],
+    )
+
+    # Once the response is received, you can process it
+    print(response.choices[0].message.tool_calls)
+
+# Ensure the main async function is run in the event loop
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())
+
+# {"id":"","object":"text_completion","created":1709051942,"model":"HuggingFaceH4/zephyr-7b-beta","system_fingerprint":"1.4.3-native","choices":[{"index":0,"message":{"role":"assistant","tool_calls":{"id":0,"type":"function","function":{"description":null,"name":"tools","parameters":{"format":"celsius","location":"New York"}}}},"logprobs":null,"finish_reason":"eos_token"}],"usage":{"prompt_tokens":157,"completion_tokens":20,"total_tokens":177}}
+
+```
+
+### OpenAI integration
+
+TGI exposes an OpenAI-compatible API, which means you can use OpenAI's client libraries to interact with TGI's Messages API and Tool functions.
+
+However there are some minor differences in the API, for example `tool_choice="auto"` will ALWAYS choose the tool for you. This is different from OpenAI's API where `tool_choice="auto"` will choose a tool if the model thinks it's necessary.
+
+```python
+from openai import OpenAI
+
+# Initialize the client, pointing it to one of the available models
+client = OpenAI(
+    base_url="http://localhost:3000/v1",
+    api_key="_",
+)
+
+# NOTE: tools defined above and removed for brevity
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {
+            "role": "system",
+            "content": "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous.",
+        },
+        {
+            "role": "user",
+            "content": "What's the weather like the next 3 days in San Francisco, CA?",
+        },
+    ],
+    tools=tools,
+    tool_choice="auto",  # tool selected by model
+    max_tokens=500,
+)
+
+
+called = chat_completion.choices[0].message.tool_calls
+print(called)
+# {
+#     "id": 0,
+#     "type": "function",
+#     "function": {
+#         "description": None,
+#         "name": "tools",
+#         "parameters": {
+#             "format": "celsius",
+#             "location": "San Francisco, CA",
+#             "num_days": 3,
+#         },
+#     },
+# }
+```
diff --git a/docs/source/conceptual/quantization.md b/docs/source/conceptual/quantization.md
index 9bd77b93..8f26fdba 100644
--- a/docs/source/conceptual/quantization.md
+++ b/docs/source/conceptual/quantization.md
@@ -4,20 +4,20 @@ TGI offers GPTQ and bits-and-bytes quantization to quantize large language model
 
 ## Quantization with GPTQ
 
-GPTQ is a post-training quantization method to make the model smaller. It quantizes the layers by finding a compressed version of that weight, that will yield a minimum mean squared error like below 👇 
+GPTQ is a post-training quantization method to make the model smaller. It quantizes the layers by finding a compressed version of that weight, that will yield a minimum mean squared error like below 👇
 
 Given a layer \\(l\\) with weight matrix \\(W_{l}\\) and layer input \\(X_{l}\\), find quantized weight \\(\\hat{W}_{l}\\):
 
 $$({\hat{W}_{l}}^{*} = argmin_{\hat{W_{l}}} ||W_{l}X-\hat{W}_{l}X||^{2}_{2})$$
 
 
-TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇 
+TGI allows you to both run an already GPTQ quantized model (see available models [here](https://huggingface.co/models?search=gptq)) or quantize a model of your choice using quantization script. You can run a quantized model by simply passing --quantize like below 👇
 
 ```bash
 docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize gptq
 ```
 
-Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI. 
+Note that TGI's GPTQ implementation doesn't use [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ) under the hood. However, models quantized using AutoGPTQ or Optimum can still be served by TGI.
 
 To quantize a given model using GPTQ with a calibration dataset, simply run
 
@@ -41,7 +41,7 @@ You can learn more about GPTQ from the [paper](https://arxiv.org/pdf/2210.17323.
 
 bitsandbytes is a library used to apply 8-bit and 4-bit quantization to models. Unlike GPTQ quantization, bitsandbytes doesn't require a calibration dataset or any post-processing – weights are automatically quantized on load. However, inference with bitsandbytes is slower than GPTQ or FP16 precision.
 
-8-bit quantization enables multi-billion parameter scale models to fit in smaller hardware without degrading performance too much. 
+8-bit quantization enables multi-billion parameter scale models to fit in smaller hardware without degrading performance too much.
 In TGI, you can use 8-bit quantization by adding `--quantize bitsandbytes` like below 👇
 
 ```bash
@@ -50,7 +50,7 @@ docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingf
 
 4-bit quantization is also possible with bitsandbytes. You can choose one of the following 4-bit data types: 4-bit float (`fp4`), or 4-bit `NormalFloat` (`nf4`). These data types were introduced in the context of parameter-efficient fine-tuning, but you can apply them for inference by automatically converting the model weights on load.
 
-In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇 
+In TGI, you can use 4-bit quantization by adding `--quantize bitsandbytes-nf4` or `--quantize bitsandbytes-fp4` like below 👇
 
 ```bash
 docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:latest --model-id $model --quantize bitsandbytes-nf4
diff --git a/docs/source/conceptual/safetensors.md b/docs/source/conceptual/safetensors.md
index fcc31bac..8ede20fe 100644
--- a/docs/source/conceptual/safetensors.md
+++ b/docs/source/conceptual/safetensors.md
@@ -1,7 +1,7 @@
 # Safetensors
 
-Safetensors is a model serialization format for deep learning models. It is [faster](https://huggingface.co/docs/safetensors/speed) and safer compared to other serialization formats like pickle (which is used under the hood in many deep learning libraries). 
+Safetensors is a model serialization format for deep learning models. It is [faster](https://huggingface.co/docs/safetensors/speed) and safer compared to other serialization formats like pickle (which is used under the hood in many deep learning libraries).
 
-TGI depends on safetensors format mainly to enable [tensor parallelism sharding](./tensor_parallelism). For a given model repository during serving, TGI looks for safetensors weights. If there are no safetensors weights, TGI converts the PyTorch weights to safetensors format. 
+TGI depends on safetensors format mainly to enable [tensor parallelism sharding](./tensor_parallelism). For a given model repository during serving, TGI looks for safetensors weights. If there are no safetensors weights, TGI converts the PyTorch weights to safetensors format.
 
-You can learn more about safetensors by reading the [safetensors documentation](https://huggingface.co/docs/safetensors/index).
\ No newline at end of file
+You can learn more about safetensors by reading the [safetensors documentation](https://huggingface.co/docs/safetensors/index).
diff --git a/docs/source/conceptual/speculation.md b/docs/source/conceptual/speculation.md
new file mode 100644
index 00000000..071b7b68
--- /dev/null
+++ b/docs/source/conceptual/speculation.md
@@ -0,0 +1,48 @@
+## Speculation
+
+Speculative decoding, assisted generation, Medusa, and others are a few different names for the same idea.
+The idea is to generate tokens *before* the large model actually runs, and only *check* if those tokens where valid.
+
+So you are making *more* computations on your LLM, but if you are correct you produce 1, 2, 3 etc.. tokens on a single LLM pass. Since LLMs are usually memory bound (and not compute bound), provided your guesses are correct enough, this is a 2-3x faster inference (It can be much more for code oriented tasks for instance).
+
+You can check a more [detailed explanation](https://huggingface.co/blog/assisted-generation).
+
+Text-generation inference supports 2 main speculative methods:
+
+- Medusa
+- N-gram
+
+
+### Medusa
+
+
+Medusa is a [simple method](https://arxiv.org/abs/2401.10774) to create many tokens in a single pass using fine-tuned LM heads in addition to your existing models.
+
+
+You can check a few existing  fine-tunes for popular models:
+
+- [text-generation-inference/gemma-7b-it-medusa](https://huggingface.co/text-generation-inference/gemma-7b-it-medusa)
+- [text-generation-inference/Mixtral-8x7B-Instruct-v0.1-medusa](https://huggingface.co/text-generation-inference/Mixtral-8x7B-Instruct-v0.1-medusa)
+- [text-generation-inference/Mistral-7B-Instruct-v0.2-medusa](https://huggingface.co/text-generation-inference/Mistral-7B-Instruct-v0.2-medusa)
+
+
+In order to create your own medusa heads for your own finetune, you should check own the original medusa repo. [https://github.com/FasterDecoding/Medusa](https://github.com/FasterDecoding/Medusa)
+
+
+In order to use medusa models in TGI, simply point to a medusa enabled model, and everything will load automatically.
+
+
+### N-gram
+
+
+If you don't have a medusa model, or don't have the resource to fine-tune, you can try to use `n-gram`.
+Ngram works by trying to find in the previous sequence existing tokens that match, and use those as speculation.
+
+This is an extremely simple method, which works best for code, or highly repetitive text. This might not be beneficial, if the speculation misses too much.
+
+
+In order to enable n-gram speculation simply use
+
+`--speculate 2` in your flags.
+
+[Details about the flag](https://huggingface.co/docs/text-generation-inference/basic_tutorials/launcher#speculate)
diff --git a/docs/source/conceptual/streaming.md b/docs/source/conceptual/streaming.md
index b7e75c5f..505a0d9e 100644
--- a/docs/source/conceptual/streaming.md
+++ b/docs/source/conceptual/streaming.md
@@ -5,12 +5,12 @@
 Token streaming is the mode in which the server returns the tokens one by one as the model generates them. This enables showing progressive generations to the user rather than waiting for the whole generation. Streaming is an essential aspect of the end-user experience as it reduces latency, one of the most critical aspects of a smooth experience.
 
 <div class="flex justify-center">
-    <img 
-        class="block dark:hidden" 
+    <img
+        class="block dark:hidden"
         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/streaming-generation-visual_360.gif"
     />
-    <img 
-        class="hidden dark:block" 
+    <img
+        class="hidden dark:block"
         src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/streaming-generation-visual-dark_360.gif"
     />
 </div>
@@ -25,14 +25,14 @@ With token streaming, the server can start returning the tokens one by one befor
 For example, a system can generate 100 tokens per second. If the system generates 1000 tokens, with the non-streaming setup, users need to wait 10 seconds to get results. On the other hand, with the streaming setup, users get initial results immediately, and although end-to-end latency will be the same, they can see half of the generation after five seconds. Below you can see an interactive demo that shows non-streaming vs streaming side-by-side. Click **generate** below.
 
 <div class="block dark:hidden">
-	<iframe 
+	<iframe
         src="https://osanseviero-streaming-vs-non-streaming.hf.space?__theme=light"
         width="850"
         height="350"
     ></iframe>
 </div>
 <div class="hidden dark:block">
-    <iframe 
+    <iframe
         src="https://osanseviero-streaming-vs-non-streaming.hf.space?__theme=dark"
         width="850"
         height="350"
@@ -43,7 +43,7 @@ For example, a system can generate 100 tokens per second. If the system generate
 
 ### Streaming with Python
 
-To stream tokens with `InferenceClient`, simply pass `stream=True` and iterate over the response. 
+To stream tokens with `InferenceClient`, simply pass `stream=True` and iterate over the response.
 
 ```python
 from huggingface_hub import InferenceClient
@@ -116,7 +116,7 @@ curl -N 127.0.0.1:8080/generate_stream \
 First, we need to install the `@huggingface/inference` library.
 `npm install @huggingface/inference`
 
-If you're using the free Inference API, you can use `HfInference`. If you're using inference endpoints, you can use `HfInferenceEndpoint`. Let's 
+If you're using the free Inference API, you can use `HfInference`. If you're using inference endpoints, you can use `HfInferenceEndpoint`. Let's
 
 We can create a `HfInferenceEndpoint` providing our endpoint URL and credential.
 
@@ -129,7 +129,7 @@ const hf = new HfInferenceEndpoint('https://YOUR_ENDPOINT.endpoints.huggingface.
 const prompt = 'What can you do in Nuremberg, Germany? Give me 3 Tips'
 
 const stream = hf.textGenerationStream({ inputs: prompt })
-for await (const r of stream) { 
+for await (const r of stream) {
   // yield the generated token
   process.stdout.write(r.token.text)
 }
diff --git a/docs/source/conceptual/tensor_parallelism.md b/docs/source/conceptual/tensor_parallelism.md
index 886a349a..2c241c41 100644
--- a/docs/source/conceptual/tensor_parallelism.md
+++ b/docs/source/conceptual/tensor_parallelism.md
@@ -1,6 +1,6 @@
 # Tensor Parallelism
 
-Tensor parallelism is a technique used to fit a large model in multiple GPUs. For example, when multiplying the input tensors with the first weight tensor, the matrix multiplication is equivalent to splitting the weight tensor column-wise, multiplying each column with the input separately, and then concatenating the separate outputs. These outputs are then transferred from the GPUs and concatenated together to get the final result, like below 👇 
+Tensor parallelism is a technique used to fit a large model in multiple GPUs. For example, when multiplying the input tensors with the first weight tensor, the matrix multiplication is equivalent to splitting the weight tensor column-wise, multiplying each column with the input separately, and then concatenating the separate outputs. These outputs are then transferred from the GPUs and concatenated together to get the final result, like below 👇
 
 ![Image courtesy of Anton Lozkhov](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/tgi/TP.png)
 
diff --git a/docs/source/installation.md b/docs/source/installation.md
index 1301b930..3e62102d 100644
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -4,7 +4,7 @@ This section explains how to install the CLI tool as well as installing TGI from
 
 ## Install CLI
 
-You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters. 
+You can use TGI command-line interface (CLI) to download weights, serve and quantize models, or get information on serving parameters.
 
 To install the CLI, you need to first clone the TGI repository and then run `make`.
 
@@ -23,7 +23,7 @@ BUILD_EXTENSIONS=True make install
 
 Before you start, you will need to setup your environment, and install Text Generation Inference. Text Generation Inference is tested on **Python 3.9+**.
 
-Text Generation Inference is available on pypi, conda and GitHub. 
+Text Generation Inference is available on pypi, conda and GitHub.
 
 To install and launch locally, first [install Rust](https://rustup.rs/) and create a Python virtual environment with at least
 Python 3.9, e.g. using conda:
diff --git a/docs/source/messages_api.md b/docs/source/messages_api.md
new file mode 100644
index 00000000..250aaae2
--- /dev/null
+++ b/docs/source/messages_api.md
@@ -0,0 +1,175 @@
+# Messages API
+
+Text Generation Inference (TGI) now supports the Messages API, which is fully compatible with the OpenAI Chat Completion API. This feature is available starting from version 1.4.0. You can use OpenAI's client libraries or third-party libraries expecting OpenAI schema to interact with TGI's Messages API. Below are some examples of how to utilize this compatibility.
+
+> **Note:** The Messages API is supported from TGI version 1.4.0 and above. Ensure you are using a compatible version to access this feature.
+
+#### Table of Contents
+
+- [Making a Request](#making-a-request)
+- [Streaming](#streaming)
+- [Synchronous](#synchronous)
+- [Hugging Face Inference Endpoints](#hugging-face-inference-endpoints)
+- [Cloud Providers](#cloud-providers)
+  - [Amazon SageMaker](#amazon-sagemaker)
+
+## Making a Request
+
+You can make a request to TGI's Messages API using `curl`. Here's an example:
+
+```bash
+curl localhost:3000/v1/chat/completions \
+    -X POST \
+    -d '{
+  "model": "tgi",
+  "messages": [
+    {
+      "role": "system",
+      "content": "You are a helpful assistant."
+    },
+    {
+      "role": "user",
+      "content": "What is deep learning?"
+    }
+  ],
+  "stream": true,
+  "max_tokens": 20
+}' \
+    -H 'Content-Type: application/json'
+```
+
+## Streaming
+
+You can also use OpenAI's Python client library to make a streaming request. Here's how:
+
+```python
+from openai import OpenAI
+
+# init the client but point it to TGI
+client = OpenAI(
+    base_url="http://localhost:3000/v1",
+    api_key="-"
+)
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ],
+    stream=True
+)
+
+# iterate and print stream
+for message in chat_completion:
+    print(message)
+```
+
+## Synchronous
+
+If you prefer to make a synchronous request, you can do so like this:
+
+```python
+from openai import OpenAI
+
+# init the client but point it to TGI
+client = OpenAI(
+    base_url="http://localhost:3000/v1",
+    api_key="-"
+)
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ],
+    stream=False
+)
+
+print(chat_completion)
+```
+
+## Hugging Face Inference Endpoints
+
+The Messages API is integrated with [Inference Endpoints](https://huggingface.co/inference-endpoints/dedicated).
+Every endpoint that uses "Text Generation Inference" with an LLM, which has a chat template can now be used. Below is an example of how to use IE with TGI using OpenAI's Python client library:
+
+> **Note:** Make sure to replace `base_url` with your endpoint URL and to include `v1/` at the end of the URL. The `api_key` should be replaced with your Hugging Face API key.
+
+```python
+from openai import OpenAI
+
+# init the client but point it to TGI
+client = OpenAI(
+    # replace with your endpoint url, make sure to include "v1/" at the end
+    base_url="https://vlzz10eq3fol3429.us-east-1.aws.endpoints.huggingface.cloud/v1/",
+    # replace with your API key
+    api_key="hf_XXX"
+)
+
+chat_completion = client.chat.completions.create(
+    model="tgi",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ],
+    stream=True
+)
+
+# iterate and print stream
+for message in chat_completion:
+    print(message.choices[0].delta.content, end="")
+```
+
+## Cloud Providers
+
+TGI can be deployed on various cloud providers for scalable and robust text generation. One such provider is Amazon SageMaker, which has recently added support for TGI. Here's how you can deploy TGI on Amazon SageMaker:
+
+## Amazon SageMaker
+
+To enable the Messages API in Amazon SageMaker you need to set the environment variable `MESSAGES_API_ENABLED=true`.
+
+This will modify the `/invocations` route to accept Messages dictonaries consisting out of role and content. See the example below on how to deploy Llama with the new Messages API.
+
+```python
+import json
+import sagemaker
+import boto3
+from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
+
+try:
+ role = sagemaker.get_execution_role()
+except ValueError:
+ iam = boto3.client('iam')
+ role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']
+
+# Hub Model configuration. https://huggingface.co/models
+hub = {
+ 'HF_MODEL_ID':'HuggingFaceH4/zephyr-7b-beta',
+ 'SM_NUM_GPUS': json.dumps(1),
+ 'MESSAGES_API_ENABLED': True
+}
+
+# create Hugging Face Model Class
+huggingface_model = HuggingFaceModel(
+ image_uri=get_huggingface_llm_image_uri("huggingface",version="1.4.0"),
+ env=hub,
+ role=role,
+)
+
+# deploy model to SageMaker Inference
+predictor = huggingface_model.deploy(
+ initial_instance_count=1,
+ instance_type="ml.g5.2xlarge",
+ container_startup_health_check_timeout=300,
+  )
+
+# send request
+predictor.predict({
+"messages": [
+        {"role": "system", "content": "You are a helpful assistant." },
+        {"role": "user", "content": "What is deep learning?"}
+    ]
+})
+```
diff --git a/docs/source/quicktour.md b/docs/source/quicktour.md
index 80561c27..70cf575c 100644
--- a/docs/source/quicktour.md
+++ b/docs/source/quicktour.md
@@ -2,23 +2,27 @@
 
 The easiest way of getting started is using the official Docker container. Install Docker following [their installation instructions](https://docs.docker.com/get-docker/).
 
-Let's say you want to deploy [Falcon-7B Instruct](https://huggingface.co/tiiuae/falcon-7b-instruct) model with TGI. Here is an example on how to do that:
+Let's say you want to deploy [teknium/OpenHermes-2.5-Mistral-7B](https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B) model with TGI. Here is an example on how to do that:
 
 ```bash
-model=tiiuae/falcon-7b-instruct
+model=teknium/OpenHermes-2.5-Mistral-7B
 volume=$PWD/data # share a volume with the Docker container to avoid downloading weights every run
 
-docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.2 --model-id $model
+docker run --gpus all --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4 --model-id $model
 ```
 
 <Tip warning={true}>
 
-To use GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html)  . We also recommend using NVIDIA drivers with CUDA version 11.8 or higher.
-
-To use TGI on RoCm-enabled AMD GPUs (only MI210 and MI250 are tested), please use the image `ghcr.io/huggingface/text-generation-inference:1.2+rocm` instead. For details about the usage on RoCm, please refer to the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html).
+To use NVIDIA GPUs, you need to install the [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). We also recommend using NVIDIA drivers with CUDA version 12.2 or higher.
 
 </Tip>
 
+TGI also supports ROCm-enabled AMD GPUs (only MI210 and MI250 are tested), details are available in the [Supported Hardware section](./supported_models#supported-hardware) and [AMD documentation](https://rocm.docs.amd.com/en/latest/deploy/docker.html). To launch TGI on ROCm GPUs, please use instead:
+
+```bash
+docker run --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --device=/dev/kfd --device=/dev/dri --group-add video --ipc=host --shm-size 1g -p 8080:80 -v $volume:/data ghcr.io/huggingface/text-generation-inference:1.4-rocm --model-id $model
+```
+
 Once TGI is running, you can use the `generate` endpoint by doing requests. To learn more about how to query the endpoints, check the [Consuming TGI](./basic_tutorials/consuming_tgi) section, where we show examples with utility libraries and UIs. Below you can see a simple snippet to query the endpoint.
 
 
@@ -49,7 +53,7 @@ print(response.json())
 ```js
 async function query() {
     const response = await fetch(
-        'http://127.0.0.1:8080/generate', 
+        'http://127.0.0.1:8080/generate',
         {
             method: 'POST',
             headers: { 'Content-Type': 'application/json'},
@@ -87,7 +91,7 @@ curl 127.0.0.1:8080/generate \
 To see all possible deploy flags and options, you can use the `--help` flag. It's possible to configure the number of shards, quantization, generation parameters, and more.
 
 ```bash
-docker run ghcr.io/huggingface/text-generation-inference:1.2 --help
+docker run ghcr.io/huggingface/text-generation-inference:1.4 --help
 ```
 
 </Tip>
diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md
index d7d45b70..fa1f9f61 100644
--- a/docs/source/supported_models.md
+++ b/docs/source/supported_models.md
@@ -19,7 +19,11 @@ The following models are optimized and can be served with TGI, which uses custom
 - [MPT](https://huggingface.co/mosaicml/mpt-30b)
 - [Llama V2](https://huggingface.co/meta-llama)
 - [Code Llama](https://huggingface.co/codellama)
-- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1)
+- [Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2)
+- [Mixtral](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)
+- [Phi](https://huggingface.co/microsoft/phi-2)
+- [Idefics](HuggingFaceM4/idefics-9b-instruct) (Multimodal)
+- [Llava-next](llava-hf/llava-v1.6-mistral-7b-hf) (Multimodal)
 
 If the above list lacks the model you would like to serve, depending on the model's pipeline type, you can try to initialize and serve the model anyways to see how well it performs, but performance isn't guaranteed for non-optimized models:
 
@@ -39,9 +43,13 @@ text-generation-launcher --model-id <PATH-TO-LOCAL-BLOOM>
 
 ## Supported Hardware
 
-TGI optimized models are supported on NVIDIA [A100](https://www.nvidia.com/en-us/data-center/a100/), [A10G](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) and [T4](https://www.nvidia.com/en-us/data-center/tesla-t4/) GPUs with CUDA 11.8+. Note that you have to install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to use it. For other NVIDIA GPUs, continuous batching will still apply, but some operations like flash attention and paged attention will not be executed. 
+TGI optimized models are supported on NVIDIA [A100](https://www.nvidia.com/en-us/data-center/a100/), [A10G](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) and [T4](https://www.nvidia.com/en-us/data-center/tesla-t4/) GPUs with CUDA 12.2+. Note that you have to install [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) to use it. For other NVIDIA GPUs, continuous batching will still apply, but some operations like flash attention and paged attention will not be executed.
 
-TGI also has support of RoCm-enabled AMD Instinct MI210 and MI250 GPUs, with paged attention and flash attention v2 support. The following features are missing from the RoCm version of TGI: quantization and flash [layer norm kernel](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm).
+TGI also has support of ROCm-enabled AMD Instinct MI210 and MI250 GPUs, with paged attention, GPTQ quantization, flash attention v2 support. The following features are currently not supported in the ROCm version of TGI, and the supported may be extended in the future:
+* Loading [AWQ](https://huggingface.co/docs/transformers/quantization#awq) checkpoints.
+* Flash [layer norm kernel](https://github.com/Dao-AILab/flash-attention/tree/main/csrc/layer_norm)
+* Kernel for sliding window attention (Mistral)
 
 TGI is also supported on the following AI hardware accelerators:
-- *Habana first-gen Gaudi and Gaudi2:* check out this [example](https://github.com/huggingface/optimum-habana/tree/main/text-generation-inference) how to serve models with TGI on Gaudi and Gaudi2 with [Optimum Habana](https://huggingface.co/docs/optimum/habana/index)
+- *Habana first-gen Gaudi and Gaudi2:* check out this [repository](https://github.com/huggingface/tgi-gaudi) to serve models with TGI on Gaudi and Gaudi2 with [Optimum Habana](https://huggingface.co/docs/optimum/habana/index)
+* *AWS Inferentia2:* check out this [guide](https://github.com/huggingface/optimum-neuron/tree/main/text-generation-inference) on how to serve models with TGI on Inferentia2.
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index 35c8faae..e8ce0d84 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -16,7 +16,17 @@ from syrupy.extensions.json import JSONSnapshotExtension
 from aiohttp import ClientConnectorError, ClientOSError, ServerDisconnectedError
 
 from text_generation import AsyncClient
-from text_generation.types import Response, Details, InputToken, Token, BestOfSequence
+from text_generation.types import (
+    Response,
+    Details,
+    InputToken,
+    Token,
+    BestOfSequence,
+    Grammar,
+    ChatComplete,
+    ChatCompletionChunk,
+    ChatCompletionComplete,
+)
 
 DOCKER_IMAGE = os.getenv("DOCKER_IMAGE", None)
 HUGGING_FACE_HUB_TOKEN = os.getenv("HUGGING_FACE_HUB_TOKEN", None)
@@ -25,6 +35,7 @@ DOCKER_VOLUME = os.getenv("DOCKER_VOLUME", "/data")
 
 class ResponseComparator(JSONSnapshotExtension):
     rtol = 0.2
+
     def serialize(
         self,
         data,
@@ -32,8 +43,16 @@ class ResponseComparator(JSONSnapshotExtension):
         exclude=None,
         matcher=None,
     ):
+        if (
+            isinstance(data, Response)
+            or isinstance(data, ChatComplete)
+            or isinstance(data, ChatCompletionChunk)
+            or isinstance(data, ChatCompletionComplete)
+        ):
+            data = data.model_dump()
+
         if isinstance(data, List):
-            data = [d.dict() for d in data]
+            data = [d.model_dump() for d in data]
 
         data = self._filter(
             data=data, depth=0, path=(), exclude=exclude, matcher=matcher
@@ -48,6 +67,15 @@ class ResponseComparator(JSONSnapshotExtension):
     ) -> bool:
         def convert_data(data):
             data = json.loads(data)
+            if isinstance(data, Dict) and "choices" in data:
+                choices = data["choices"]
+                if (
+                    isinstance(choices, List)
+                    and len(choices) >= 1
+                    and "delta" in choices[0]
+                ):
+                    return ChatCompletionChunk(**data)
+                return ChatComplete(**data)
 
             if isinstance(data, Dict):
                 return Response(**data)
@@ -69,7 +97,9 @@ class ResponseComparator(JSONSnapshotExtension):
                     prefill_token.id == other.id
                     and prefill_token.text == other.text
                     and (
-                        math.isclose(prefill_token.logprob, other.logprob, rel_tol=self.rtol)
+                        math.isclose(
+                            prefill_token.logprob, other.logprob, rel_tol=self.rtol
+                        )
                         if prefill_token.logprob is not None
                         else prefill_token.logprob == other.logprob
                     )
@@ -131,6 +161,16 @@ class ResponseComparator(JSONSnapshotExtension):
                 )
             )
 
+        def eq_chat_complete(response: ChatComplete, other: ChatComplete) -> bool:
+            return (
+                response.choices[0].message.content == other.choices[0].message.content
+            )
+
+        def eq_chat_complete_chunk(
+            response: ChatCompletionChunk, other: ChatCompletionChunk
+        ) -> bool:
+            return response.choices[0].delta.content == other.choices[0].delta.content
+
         def eq_response(response: Response, other: Response) -> bool:
             return response.generated_text == other.generated_text and eq_details(
                 response.details, other.details
@@ -144,6 +184,19 @@ class ResponseComparator(JSONSnapshotExtension):
         if not isinstance(snapshot_data, List):
             snapshot_data = [snapshot_data]
 
+        if isinstance(serialized_data[0], ChatComplete):
+            return len(snapshot_data) == len(serialized_data) and all(
+                [eq_chat_complete(r, o) for r, o in zip(serialized_data, snapshot_data)]
+            )
+
+        if isinstance(serialized_data[0], ChatCompletionChunk):
+            return len(snapshot_data) == len(serialized_data) and all(
+                [
+                    eq_chat_complete_chunk(r, o)
+                    for r, o in zip(serialized_data, snapshot_data)
+                ]
+            )
+
         return len(snapshot_data) == len(serialized_data) and all(
             [eq_response(r, o) for r, o in zip(serialized_data, snapshot_data)]
         )
@@ -153,6 +206,7 @@ class GenerousResponseComparator(ResponseComparator):
     # Needed for GPTQ with exllama which has serious numerical fluctuations.
     rtol = 0.75
 
+
 class LauncherHandle:
     def __init__(self, port: int):
         self.client = AsyncClient(f"http://localhost:{port}")
@@ -198,6 +252,7 @@ class ProcessLauncherHandle(LauncherHandle):
 def response_snapshot(snapshot):
     return snapshot.use_extension(ResponseComparator)
 
+
 @pytest.fixture
 def generous_response_snapshot(snapshot):
     return snapshot.use_extension(GenerousResponseComparator)
@@ -219,7 +274,11 @@ def launcher(event_loop):
         quantize: Optional[str] = None,
         trust_remote_code: bool = False,
         use_flash_attention: bool = True,
-        dtype: Optional[str] = None
+        disable_grammar_support: bool = False,
+        dtype: Optional[str] = None,
+        revision: Optional[str] = None,
+        max_input_length: Optional[int] = None,
+        max_total_tokens: Optional[int] = None,
     ):
         port = random.randint(8000, 10_000)
         master_port = random.randint(10_000, 20_000)
@@ -242,6 +301,8 @@ def launcher(event_loop):
 
         env = os.environ
 
+        if disable_grammar_support:
+            args.append("--disable-grammar-support")
         if num_shard is not None:
             args.extend(["--num-shard", str(num_shard)])
         if quantize is not None:
@@ -250,8 +311,17 @@ def launcher(event_loop):
         if dtype is not None:
             args.append("--dtype")
             args.append(dtype)
+        if revision is not None:
+            args.append("--revision")
+            args.append(revision)
         if trust_remote_code:
             args.append("--trust-remote-code")
+        if max_input_length:
+            args.append("--max-input-length")
+            args.append(str(max_input_length))
+        if max_total_tokens:
+            args.append("--max-total-tokens")
+            args.append(str(max_total_tokens))
 
         env["LOG_LEVEL"] = "info,text_generation_router=debug"
 
@@ -282,12 +352,18 @@ def launcher(event_loop):
         quantize: Optional[str] = None,
         trust_remote_code: bool = False,
         use_flash_attention: bool = True,
-        dtype: Optional[str] = None
+        disable_grammar_support: bool = False,
+        dtype: Optional[str] = None,
+        revision: Optional[str] = None,
+        max_input_length: Optional[int] = None,
+        max_total_tokens: Optional[int] = None,
     ):
         port = random.randint(8000, 10_000)
 
         args = ["--model-id", model_id, "--env"]
 
+        if disable_grammar_support:
+            args.append("--disable-grammar-support")
         if num_shard is not None:
             args.extend(["--num-shard", str(num_shard)])
         if quantize is not None:
@@ -296,8 +372,17 @@ def launcher(event_loop):
         if dtype is not None:
             args.append("--dtype")
             args.append(dtype)
+        if revision is not None:
+            args.append("--revision")
+            args.append(revision)
         if trust_remote_code:
             args.append("--trust-remote-code")
+        if max_input_length:
+            args.append("--max-input-length")
+            args.append(str(max_input_length))
+        if max_total_tokens:
+            args.append("--max-total-tokens")
+            args.append(str(max_total_tokens))
 
         client = docker.from_env()
 
@@ -312,7 +397,9 @@ def launcher(event_loop):
 
         gpu_count = num_shard if num_shard is not None else 1
 
-        env = {"LOG_LEVEL": "info,text_generation_router=debug"}
+        env = {
+            "LOG_LEVEL": "info,text_generation_router=debug",
+        }
         if not use_flash_attention:
             env["USE_FLASH_ATTENTION"] = "false"
 
@@ -335,7 +422,7 @@ def launcher(event_loop):
             ],
             volumes=volumes,
             ports={"80/tcp": port},
-            shm_size="1G"
+            shm_size="1G",
         )
 
         yield ContainerLauncherHandle(client, container.name, port)
@@ -362,11 +449,22 @@ def launcher(event_loop):
 @pytest.fixture(scope="module")
 def generate_load():
     async def generate_load_inner(
-        client: AsyncClient, prompt: str, max_new_tokens: int, n: int
+        client: AsyncClient,
+        prompt: str,
+        max_new_tokens: int,
+        n: int,
+        seed: Optional[int] = None,
+        grammar: Optional[Grammar] = None,
+        stop_sequences: Optional[List[str]] = None,
     ) -> List[Response]:
         futures = [
             client.generate(
-                prompt, max_new_tokens=max_new_tokens, decoder_input_details=True
+                prompt,
+                max_new_tokens=max_new_tokens,
+                decoder_input_details=True,
+                seed=seed,
+                grammar=grammar,
+                stop_sequences=stop_sequences,
             )
             for _ in range(n)
         ]
diff --git a/integration-tests/images/chicken_on_money.png b/integration-tests/images/chicken_on_money.png
new file mode 100644
index 00000000..1a4e0440
Binary files /dev/null and b/integration-tests/images/chicken_on_money.png differ
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json
new file mode 100644
index 00000000..80f0d053
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json
@@ -0,0 +1,89 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 2,
+        "logprob": null,
+        "text": "<bos>"
+      },
+      {
+        "id": 2015,
+        "logprob": -10.0,
+        "text": "Test"
+      },
+      {
+        "id": 3853,
+        "logprob": -10.875,
+        "text": " request"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 1736,
+        "logprob": -2.09375,
+        "special": false,
+        "text": " form"
+      },
+      {
+        "id": 109,
+        "logprob": -1.8671875,
+        "special": false,
+        "text": "\n\n"
+      },
+      {
+        "id": 651,
+        "logprob": -2.4375,
+        "special": false,
+        "text": "The"
+      },
+      {
+        "id": 2121,
+        "logprob": -1.8203125,
+        "special": false,
+        "text": " test"
+      },
+      {
+        "id": 3853,
+        "logprob": -0.23242188,
+        "special": false,
+        "text": " request"
+      },
+      {
+        "id": 1736,
+        "logprob": -0.08544922,
+        "special": false,
+        "text": " form"
+      },
+      {
+        "id": 603,
+        "logprob": -0.9375,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 1671,
+        "logprob": -1.671875,
+        "special": false,
+        "text": " used"
+      },
+      {
+        "id": 577,
+        "logprob": -0.40429688,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 3853,
+        "logprob": -1.1875,
+        "special": false,
+        "text": " request"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": " form\n\nThe test request form is used to request"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json
new file mode 100644
index 00000000..8253dc96
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json
@@ -0,0 +1,89 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 2,
+        "logprob": null,
+        "text": "<bos>"
+      },
+      {
+        "id": 2015,
+        "logprob": -10.0,
+        "text": "Test"
+      },
+      {
+        "id": 3853,
+        "logprob": -10.875,
+        "text": " request"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 7539,
+        "logprob": -0.73046875,
+        "special": false,
+        "text": " forms"
+      },
+      {
+        "id": 708,
+        "logprob": 0.0,
+        "special": false,
+        "text": " are"
+      },
+      {
+        "id": 671,
+        "logprob": -1.703125,
+        "special": false,
+        "text": " an"
+      },
+      {
+        "id": 8727,
+        "logprob": 0.0,
+        "special": false,
+        "text": " essential"
+      },
+      {
+        "id": 1702,
+        "logprob": 0.0,
+        "special": false,
+        "text": " part"
+      },
+      {
+        "id": 576,
+        "logprob": 0.0,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 573,
+        "logprob": 0.0,
+        "special": false,
+        "text": " the"
+      },
+      {
+        "id": 11859,
+        "logprob": -1.6953125,
+        "special": false,
+        "text": " lab"
+      },
+      {
+        "id": 2185,
+        "logprob": -1.3125,
+        "special": false,
+        "text": " process"
+      },
+      {
+        "id": 578,
+        "logprob": -1.5,
+        "special": false,
+        "text": " and"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Test request forms are an essential part of the lab process and"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_load.json b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_load.json
new file mode 100644
index 00000000..e69ee25d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_load.json
@@ -0,0 +1,358 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2,
+          "logprob": null,
+          "text": "<bos>"
+        },
+        {
+          "id": 2015,
+          "logprob": -10.0,
+          "text": "Test"
+        },
+        {
+          "id": 3853,
+          "logprob": -10.875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1736,
+          "logprob": -2.09375,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 109,
+          "logprob": -1.9140625,
+          "special": false,
+          "text": "\n\n"
+        },
+        {
+          "id": 651,
+          "logprob": -2.453125,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 2121,
+          "logprob": -1.8984375,
+          "special": false,
+          "text": " test"
+        },
+        {
+          "id": 3853,
+          "logprob": -0.23535156,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 1736,
+          "logprob": -0.091308594,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 603,
+          "logprob": -0.96875,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 1671,
+          "logprob": -1.6484375,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 577,
+          "logprob": -0.43164062,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 3853,
+          "logprob": -1.2421875,
+          "special": false,
+          "text": " request"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " form\n\nThe test request form is used to request"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2,
+          "logprob": null,
+          "text": "<bos>"
+        },
+        {
+          "id": 2015,
+          "logprob": -10.0,
+          "text": "Test"
+        },
+        {
+          "id": 3853,
+          "logprob": -10.875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1736,
+          "logprob": -2.09375,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 109,
+          "logprob": -1.9140625,
+          "special": false,
+          "text": "\n\n"
+        },
+        {
+          "id": 651,
+          "logprob": -2.453125,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 2121,
+          "logprob": -1.8984375,
+          "special": false,
+          "text": " test"
+        },
+        {
+          "id": 3853,
+          "logprob": -0.23535156,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 1736,
+          "logprob": -0.091308594,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 603,
+          "logprob": -0.96875,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 1671,
+          "logprob": -1.6484375,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 577,
+          "logprob": -0.43164062,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 3853,
+          "logprob": -1.2421875,
+          "special": false,
+          "text": " request"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " form\n\nThe test request form is used to request"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2,
+          "logprob": null,
+          "text": "<bos>"
+        },
+        {
+          "id": 2015,
+          "logprob": -10.0,
+          "text": "Test"
+        },
+        {
+          "id": 3853,
+          "logprob": -10.875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1736,
+          "logprob": -2.09375,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 109,
+          "logprob": -1.9140625,
+          "special": false,
+          "text": "\n\n"
+        },
+        {
+          "id": 651,
+          "logprob": -2.453125,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 2121,
+          "logprob": -1.8984375,
+          "special": false,
+          "text": " test"
+        },
+        {
+          "id": 3853,
+          "logprob": -0.23535156,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 1736,
+          "logprob": -0.091308594,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 603,
+          "logprob": -0.96875,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 1671,
+          "logprob": -1.6484375,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 577,
+          "logprob": -0.43164062,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 3853,
+          "logprob": -1.2421875,
+          "special": false,
+          "text": " request"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " form\n\nThe test request form is used to request"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2,
+          "logprob": null,
+          "text": "<bos>"
+        },
+        {
+          "id": 2015,
+          "logprob": -10.0,
+          "text": "Test"
+        },
+        {
+          "id": 3853,
+          "logprob": -10.875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 1736,
+          "logprob": -2.09375,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 109,
+          "logprob": -1.9140625,
+          "special": false,
+          "text": "\n\n"
+        },
+        {
+          "id": 651,
+          "logprob": -2.453125,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 2121,
+          "logprob": -1.8984375,
+          "special": false,
+          "text": " test"
+        },
+        {
+          "id": 3853,
+          "logprob": -0.23535156,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 1736,
+          "logprob": -0.091308594,
+          "special": false,
+          "text": " form"
+        },
+        {
+          "id": 603,
+          "logprob": -0.96875,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 1671,
+          "logprob": -1.6484375,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 577,
+          "logprob": -0.43164062,
+          "special": false,
+          "text": " to"
+        },
+        {
+          "id": 3853,
+          "logprob": -1.2421875,
+          "special": false,
+          "text": " request"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": " form\n\nThe test request form is used to request"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar.json b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar.json
new file mode 100644
index 00000000..0e87f59e
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar.json
@@ -0,0 +1,89 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 4321,
+        "logprob": -13.90625,
+        "text": "Test"
+      },
+      {
+        "id": 2009,
+        "logprob": -12.328125,
+        "text": "request"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -2.0566406,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 13,
+        "logprob": -1.5253906,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 29902,
+        "logprob": -2.7578125,
+        "special": false,
+        "text": "I"
+      },
+      {
+        "id": 4966,
+        "logprob": -1.9033203,
+        "special": false,
+        "text": " hope"
+      },
+      {
+        "id": 445,
+        "logprob": -0.5019531,
+        "special": false,
+        "text": " this"
+      },
+      {
+        "id": 6911,
+        "logprob": -0.21264648,
+        "special": false,
+        "text": " helps"
+      },
+      {
+        "id": 29991,
+        "logprob": -0.5991211,
+        "special": false,
+        "text": "!"
+      },
+      {
+        "id": 2803,
+        "logprob": -0.37475586,
+        "special": false,
+        "text": " Let"
+      },
+      {
+        "id": 592,
+        "logprob": -0.018463135,
+        "special": false,
+        "text": " me"
+      },
+      {
+        "id": 1073,
+        "logprob": -0.0008597374,
+        "special": false,
+        "text": " know"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\n\nI hope this helps! Let me know"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_json.json b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_json.json
new file mode 100644
index 00000000..d7fb620d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_json.json
@@ -0,0 +1,274 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "eos_token",
+    "generated_tokens": 30,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 5235,
+        "logprob": -10.0625,
+        "text": "info"
+      },
+      {
+        "id": 29901,
+        "logprob": -3.2324219,
+        "text": ":"
+      },
+      {
+        "id": 13260,
+        "logprob": -10.625,
+        "text": "dav"
+      },
+      {
+        "id": 333,
+        "logprob": -0.08276367,
+        "text": "id"
+      },
+      {
+        "id": 8753,
+        "logprob": -7.5273438,
+        "text": "hol"
+      },
+      {
+        "id": 17559,
+        "logprob": -3.8476562,
+        "text": "tz"
+      },
+      {
+        "id": 763,
+        "logprob": -10.140625,
+        "text": "like"
+      },
+      {
+        "id": 10697,
+        "logprob": -10.1953125,
+        "text": "trees"
+      },
+      {
+        "id": 322,
+        "logprob": -2.5742188,
+        "text": "and"
+      },
+      {
+        "id": 756,
+        "logprob": -7.4882812,
+        "text": "has"
+      },
+      {
+        "id": 1023,
+        "logprob": -5.0507812,
+        "text": "two"
+      },
+      {
+        "id": 274,
+        "logprob": -5.3164062,
+        "text": "c"
+      },
+      {
+        "id": 1446,
+        "logprob": -0.6694336,
+        "text": "ats"
+      },
+      {
+        "id": 29889,
+        "logprob": -0.9995117,
+        "text": "."
+      },
+      {
+        "id": 29871,
+        "logprob": -4.2421875,
+        "text": ""
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 6377,
+        "logprob": -0.14916992,
+        "special": false,
+        "text": "{\""
+      },
+      {
+        "id": 29888,
+        "logprob": -0.13598633,
+        "special": false,
+        "text": "f"
+      },
+      {
+        "id": 12935,
+        "logprob": -0.017669678,
+        "special": false,
+        "text": "irs"
+      },
+      {
+        "id": 29873,
+        "logprob": -0.00085639954,
+        "special": false,
+        "text": "t"
+      },
+      {
+        "id": 1170,
+        "logprob": -0.0054016113,
+        "special": false,
+        "text": "Name"
+      },
+      {
+        "id": 4710,
+        "logprob": -0.13549805,
+        "special": false,
+        "text": "\":\""
+      },
+      {
+        "id": 19504,
+        "logprob": -0.8852539,
+        "special": false,
+        "text": "David"
+      },
+      {
+        "id": 3284,
+        "logprob": -0.16394043,
+        "special": false,
+        "text": "\",\""
+      },
+      {
+        "id": 29882,
+        "logprob": -0.08862305,
+        "special": false,
+        "text": "h"
+      },
+      {
+        "id": 711,
+        "logprob": -0.66259766,
+        "special": false,
+        "text": "ob"
+      },
+      {
+        "id": 1609,
+        "logprob": -5.51939e-05,
+        "special": false,
+        "text": "by"
+      },
+      {
+        "id": 4710,
+        "logprob": -0.23120117,
+        "special": false,
+        "text": "\":\""
+      },
+      {
+        "id": 29911,
+        "logprob": -2.3730469,
+        "special": false,
+        "text": "T"
+      },
+      {
+        "id": 11003,
+        "logprob": -0.032104492,
+        "special": false,
+        "text": "rees"
+      },
+      {
+        "id": 3284,
+        "logprob": -0.22021484,
+        "special": false,
+        "text": "\",\""
+      },
+      {
+        "id": 4230,
+        "logprob": -0.06726074,
+        "special": false,
+        "text": "last"
+      },
+      {
+        "id": 1170,
+        "logprob": -0.003501892,
+        "special": false,
+        "text": "Name"
+      },
+      {
+        "id": 4710,
+        "logprob": -0.0045661926,
+        "special": false,
+        "text": "\":\""
+      },
+      {
+        "id": 29950,
+        "logprob": -0.12512207,
+        "special": false,
+        "text": "H"
+      },
+      {
+        "id": 14339,
+        "logprob": -0.009552002,
+        "special": false,
+        "text": "olt"
+      },
+      {
+        "id": 29920,
+        "logprob": -0.00042438507,
+        "special": false,
+        "text": "z"
+      },
+      {
+        "id": 3284,
+        "logprob": -0.11651611,
+        "special": false,
+        "text": "\",\""
+      },
+      {
+        "id": 29876,
+        "logprob": -0.29736328,
+        "special": false,
+        "text": "n"
+      },
+      {
+        "id": 398,
+        "logprob": -0.003030777,
+        "special": false,
+        "text": "um"
+      },
+      {
+        "id": 29907,
+        "logprob": -0.3774414,
+        "special": false,
+        "text": "C"
+      },
+      {
+        "id": 1446,
+        "logprob": -0.0003130436,
+        "special": false,
+        "text": "ats"
+      },
+      {
+        "id": 1115,
+        "logprob": -0.0021514893,
+        "special": false,
+        "text": "\":"
+      },
+      {
+        "id": 29906,
+        "logprob": -0.071899414,
+        "special": false,
+        "text": "2"
+      },
+      {
+        "id": 29913,
+        "logprob": -0.018997192,
+        "special": false,
+        "text": "}"
+      },
+      {
+        "id": 2,
+        "logprob": 0.0,
+        "special": true,
+        "text": "</s>"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "{\"firstName\":\"David\",\"hobby\":\"Trees\",\"lastName\":\"Holtz\",\"numCats\":2}"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_load.json b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_load.json
new file mode 100644
index 00000000..411f3947
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_load.json
@@ -0,0 +1,478 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1024,
+          "logprob": -10.578125,
+          "text": "name"
+        },
+        {
+          "id": 29901,
+          "logprob": -3.0332031,
+          "text": ":"
+        },
+        {
+          "id": 13260,
+          "logprob": -9.171875,
+          "text": "dav"
+        },
+        {
+          "id": 333,
+          "logprob": -0.04257202,
+          "text": "id"
+        },
+        {
+          "id": 29889,
+          "logprob": -2.4785156,
+          "text": "."
+        },
+        {
+          "id": 4876,
+          "logprob": -10.7890625,
+          "text": "email"
+        },
+        {
+          "id": 29901,
+          "logprob": -0.32495117,
+          "text": ":"
+        },
+        {
+          "id": 259,
+          "logprob": -9.4921875,
+          "text": " "
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 29896,
+          "logprob": -0.7709961,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29906,
+          "logprob": -0.33740234,
+          "special": false,
+          "text": "2"
+        },
+        {
+          "id": 29941,
+          "logprob": -0.00995636,
+          "special": false,
+          "text": "3"
+        },
+        {
+          "id": 29946,
+          "logprob": -0.64208984,
+          "special": false,
+          "text": "4"
+        },
+        {
+          "id": 29945,
+          "logprob": -0.4970703,
+          "special": false,
+          "text": "5"
+        },
+        {
+          "id": 29953,
+          "logprob": -0.46533203,
+          "special": false,
+          "text": "6"
+        },
+        {
+          "id": 29992,
+          "logprob": -0.5336914,
+          "special": false,
+          "text": "@"
+        },
+        {
+          "id": 21980,
+          "logprob": -0.5361328,
+          "special": false,
+          "text": "gmail"
+        },
+        {
+          "id": 29889,
+          "logprob": -0.00088739395,
+          "special": false,
+          "text": "."
+        },
+        {
+          "id": 510,
+          "logprob": -0.0022735596,
+          "special": false,
+          "text": "com"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "123456@gmail.com"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1024,
+          "logprob": -10.578125,
+          "text": "name"
+        },
+        {
+          "id": 29901,
+          "logprob": -3.03125,
+          "text": ":"
+        },
+        {
+          "id": 13260,
+          "logprob": -9.171875,
+          "text": "dav"
+        },
+        {
+          "id": 333,
+          "logprob": -0.04244995,
+          "text": "id"
+        },
+        {
+          "id": 29889,
+          "logprob": -2.4863281,
+          "text": "."
+        },
+        {
+          "id": 4876,
+          "logprob": -10.7890625,
+          "text": "email"
+        },
+        {
+          "id": 29901,
+          "logprob": -0.32714844,
+          "text": ":"
+        },
+        {
+          "id": 259,
+          "logprob": -9.4921875,
+          "text": " "
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 29896,
+          "logprob": -0.7685547,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29906,
+          "logprob": -0.33666992,
+          "special": false,
+          "text": "2"
+        },
+        {
+          "id": 29941,
+          "logprob": -0.01008606,
+          "special": false,
+          "text": "3"
+        },
+        {
+          "id": 29946,
+          "logprob": -0.64160156,
+          "special": false,
+          "text": "4"
+        },
+        {
+          "id": 29945,
+          "logprob": -0.5,
+          "special": false,
+          "text": "5"
+        },
+        {
+          "id": 29953,
+          "logprob": -0.46557617,
+          "special": false,
+          "text": "6"
+        },
+        {
+          "id": 29992,
+          "logprob": -0.5341797,
+          "special": false,
+          "text": "@"
+        },
+        {
+          "id": 21980,
+          "logprob": -0.5361328,
+          "special": false,
+          "text": "gmail"
+        },
+        {
+          "id": 29889,
+          "logprob": -0.00088739395,
+          "special": false,
+          "text": "."
+        },
+        {
+          "id": 510,
+          "logprob": -0.0022907257,
+          "special": false,
+          "text": "com"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "123456@gmail.com"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1024,
+          "logprob": -10.578125,
+          "text": "name"
+        },
+        {
+          "id": 29901,
+          "logprob": -3.0332031,
+          "text": ":"
+        },
+        {
+          "id": 13260,
+          "logprob": -9.171875,
+          "text": "dav"
+        },
+        {
+          "id": 333,
+          "logprob": -0.04257202,
+          "text": "id"
+        },
+        {
+          "id": 29889,
+          "logprob": -2.4785156,
+          "text": "."
+        },
+        {
+          "id": 4876,
+          "logprob": -10.7890625,
+          "text": "email"
+        },
+        {
+          "id": 29901,
+          "logprob": -0.32495117,
+          "text": ":"
+        },
+        {
+          "id": 259,
+          "logprob": -9.4921875,
+          "text": " "
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 29896,
+          "logprob": -0.7709961,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29906,
+          "logprob": -0.33740234,
+          "special": false,
+          "text": "2"
+        },
+        {
+          "id": 29941,
+          "logprob": -0.00995636,
+          "special": false,
+          "text": "3"
+        },
+        {
+          "id": 29946,
+          "logprob": -0.64208984,
+          "special": false,
+          "text": "4"
+        },
+        {
+          "id": 29945,
+          "logprob": -0.4970703,
+          "special": false,
+          "text": "5"
+        },
+        {
+          "id": 29953,
+          "logprob": -0.46533203,
+          "special": false,
+          "text": "6"
+        },
+        {
+          "id": 29992,
+          "logprob": -0.5336914,
+          "special": false,
+          "text": "@"
+        },
+        {
+          "id": 21980,
+          "logprob": -0.5361328,
+          "special": false,
+          "text": "gmail"
+        },
+        {
+          "id": 29889,
+          "logprob": -0.00088739395,
+          "special": false,
+          "text": "."
+        },
+        {
+          "id": 510,
+          "logprob": -0.0022735596,
+          "special": false,
+          "text": "com"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "123456@gmail.com"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1024,
+          "logprob": -10.578125,
+          "text": "name"
+        },
+        {
+          "id": 29901,
+          "logprob": -3.0332031,
+          "text": ":"
+        },
+        {
+          "id": 13260,
+          "logprob": -9.171875,
+          "text": "dav"
+        },
+        {
+          "id": 333,
+          "logprob": -0.04257202,
+          "text": "id"
+        },
+        {
+          "id": 29889,
+          "logprob": -2.4785156,
+          "text": "."
+        },
+        {
+          "id": 4876,
+          "logprob": -10.7890625,
+          "text": "email"
+        },
+        {
+          "id": 29901,
+          "logprob": -0.32495117,
+          "text": ":"
+        },
+        {
+          "id": 259,
+          "logprob": -9.4921875,
+          "text": " "
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 29896,
+          "logprob": -0.7709961,
+          "special": false,
+          "text": "1"
+        },
+        {
+          "id": 29906,
+          "logprob": -0.33740234,
+          "special": false,
+          "text": "2"
+        },
+        {
+          "id": 29941,
+          "logprob": -0.00995636,
+          "special": false,
+          "text": "3"
+        },
+        {
+          "id": 29946,
+          "logprob": -0.64208984,
+          "special": false,
+          "text": "4"
+        },
+        {
+          "id": 29945,
+          "logprob": -0.4970703,
+          "special": false,
+          "text": "5"
+        },
+        {
+          "id": 29953,
+          "logprob": -0.46533203,
+          "special": false,
+          "text": "6"
+        },
+        {
+          "id": 29992,
+          "logprob": -0.5336914,
+          "special": false,
+          "text": "@"
+        },
+        {
+          "id": 21980,
+          "logprob": -0.5361328,
+          "special": false,
+          "text": "gmail"
+        },
+        {
+          "id": 29889,
+          "logprob": -0.00088739395,
+          "special": false,
+          "text": "."
+        },
+        {
+          "id": 510,
+          "logprob": -0.0022735596,
+          "special": false,
+          "text": "com"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "123456@gmail.com"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_regex.json b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_regex.json
new file mode 100644
index 00000000..1ba9ae1e
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_regex.json
@@ -0,0 +1,109 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 806,
+        "logprob": -11.890625,
+        "text": "Wh"
+      },
+      {
+        "id": 1446,
+        "logprob": -3.6699219,
+        "text": "ats"
+      },
+      {
+        "id": 2921,
+        "logprob": -7.8203125,
+        "text": "Go"
+      },
+      {
+        "id": 468,
+        "logprob": -8.0703125,
+        "text": "og"
+      },
+      {
+        "id": 793,
+        "logprob": -2.1875,
+        "text": "les"
+      },
+      {
+        "id": 16332,
+        "logprob": -9.7109375,
+        "text": "DNS"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 29946,
+        "logprob": -1.4765625,
+        "special": false,
+        "text": "4"
+      },
+      {
+        "id": 29906,
+        "logprob": -0.9199219,
+        "special": false,
+        "text": "2"
+      },
+      {
+        "id": 29889,
+        "logprob": 0.0,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 29896,
+        "logprob": -1.1367188,
+        "special": false,
+        "text": "1"
+      },
+      {
+        "id": 29889,
+        "logprob": -1.4648438,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 29896,
+        "logprob": -0.40722656,
+        "special": false,
+        "text": "1"
+      },
+      {
+        "id": 29889,
+        "logprob": -0.17419434,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 29896,
+        "logprob": -0.20251465,
+        "special": false,
+        "text": "1"
+      },
+      {
+        "id": 29900,
+        "logprob": -1.5527344,
+        "special": false,
+        "text": "0"
+      },
+      {
+        "id": 29896,
+        "logprob": -1.3710938,
+        "special": false,
+        "text": "1"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "42.1.1.101"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_single_load_instance.json b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_single_load_instance.json
new file mode 100644
index 00000000..7ffb17cb
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_grammar_llama/test_flash_llama_grammar_single_load_instance.json
@@ -0,0 +1,73 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 29896,
+        "logprob": -0.7685547,
+        "special": false,
+        "text": "1"
+      },
+      {
+        "id": 29906,
+        "logprob": -0.33666992,
+        "special": false,
+        "text": "2"
+      },
+      {
+        "id": 29941,
+        "logprob": -0.009979248,
+        "special": false,
+        "text": "3"
+      },
+      {
+        "id": 29946,
+        "logprob": -0.64208984,
+        "special": false,
+        "text": "4"
+      },
+      {
+        "id": 29945,
+        "logprob": -0.4970703,
+        "special": false,
+        "text": "5"
+      },
+      {
+        "id": 29953,
+        "logprob": -0.46533203,
+        "special": false,
+        "text": "6"
+      },
+      {
+        "id": 29992,
+        "logprob": -0.5336914,
+        "special": false,
+        "text": "@"
+      },
+      {
+        "id": 21980,
+        "logprob": -0.53759766,
+        "special": false,
+        "text": "gmail"
+      },
+      {
+        "id": 29889,
+        "logprob": -0.0008878708,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 510,
+        "logprob": -0.002275467,
+        "special": false,
+        "text": "com"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "123456@gmail.com"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
index e4ffb83b..7797cc6c 100644
--- a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
+++ b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq.json
@@ -11,78 +11,79 @@
       },
       {
         "id": 4321,
-        "logprob": -9.59375,
+        "logprob": -9.7890625,
         "text": "Test"
       },
       {
         "id": 2009,
-        "logprob": -9.6640625,
+        "logprob": -9.625,
         "text": "request"
       }
     ],
     "seed": null,
     "tokens": [
-      {
-        "id": 29918,
-        "logprob": -2.3867188,
-        "special": false,
-        "text": "_"
-      },
-      {
-        "id": 5338,
-        "logprob": -2.8183594,
-        "special": false,
-        "text": "uri"
-      },
       {
         "id": 13,
-        "logprob": -1.6367188,
+        "logprob": -2.3359375,
         "special": false,
         "text": "\n"
       },
       {
         "id": 3057,
-        "logprob": -1.0527344,
+        "logprob": -1.8779297,
         "special": false,
         "text": "Test"
       },
       {
         "id": 2009,
-        "logprob": -0.6542969,
+        "logprob": -1.2744141,
         "special": false,
         "text": " request"
       },
-      {
-        "id": 29918,
-        "logprob": -0.056121826,
-        "special": false,
-        "text": "_"
-      },
-      {
-        "id": 5338,
-        "logprob": -0.01600647,
-        "special": false,
-        "text": "uri"
-      },
       {
         "id": 13,
-        "logprob": -0.87939453,
+        "logprob": -1.6933594,
         "special": false,
         "text": "\n"
       },
       {
         "id": 3057,
-        "logprob": -0.7529297,
+        "logprob": -1.4648438,
         "special": false,
         "text": "Test"
       },
       {
         "id": 2009,
-        "logprob": -0.2980957,
+        "logprob": -0.15600586,
         "special": false,
         "text": " request"
+      },
+      {
+        "id": 13,
+        "logprob": -0.8027344,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 3057,
+        "logprob": -0.23022461,
+        "special": false,
+        "text": "Test"
+      },
+      {
+        "id": 2009,
+        "logprob": -0.0069885254,
+        "special": false,
+        "text": " request"
+      },
+      {
+        "id": 13,
+        "logprob": -0.02218628,
+        "special": false,
+        "text": "\n"
       }
-    ]
+    ],
+    "top_tokens": null
   },
-  "generated_text": "_uri\nTest request_uri\nTest request"
+  "generated_text": "\nTest request\nTest request\nTest request\n"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
index 02713a00..fa2fd4a2 100644
--- a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_all_params.json
@@ -11,12 +11,12 @@
       },
       {
         "id": 4321,
-        "logprob": -9.6015625,
+        "logprob": -9.84375,
         "text": "Test"
       },
       {
         "id": 2009,
-        "logprob": -9.6640625,
+        "logprob": -9.6015625,
         "text": "request"
       }
     ],
@@ -24,13 +24,13 @@
     "tokens": [
       {
         "id": 29899,
-        "logprob": -1.1640625,
+        "logprob": -1.5625,
         "special": false,
         "text": "-"
       },
       {
         "id": 1454,
-        "logprob": -0.07543945,
+        "logprob": -0.20410156,
         "special": false,
         "text": "for"
       },
@@ -54,19 +54,19 @@
       },
       {
         "id": 396,
-        "logprob": -0.2956543,
+        "logprob": -0.27685547,
         "special": false,
         "text": " #"
       },
       {
         "id": 29906,
-        "logprob": -0.52734375,
+        "logprob": -0.4970703,
         "special": false,
         "text": "2"
       },
       {
         "id": 29900,
-        "logprob": -0.6899414,
+        "logprob": -0.80615234,
         "special": false,
         "text": "0"
       },
@@ -77,12 +77,13 @@
         "text": "1"
       },
       {
-        "id": 29946,
-        "logprob": -1.5068359,
+        "id": 29955,
+        "logprob": -1.0751953,
         "special": false,
-        "text": "4"
+        "text": "7"
       }
-    ]
+    ],
+    "top_tokens": null
   },
-  "generated_text": "Test request-for-comment: #2014"
+  "generated_text": "Test request-for-comment: #2017"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
index 88bfa4f9..594b7351 100644
--- a/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_llama_gptq/test_flash_llama_gptq_load.json
@@ -12,80 +12,81 @@
         },
         {
           "id": 4321,
-          "logprob": -9.6015625,
+          "logprob": -9.828125,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -9.671875,
+          "logprob": -9.609375,
           "text": "request"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29918,
-          "logprob": -2.3828125,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -2.8105469,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -1.6396484,
+          "logprob": -2.3300781,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -1.0546875,
+          "logprob": -1.8740234,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.6513672,
+          "logprob": -1.2646484,
           "special": false,
           "text": " request"
         },
-        {
-          "id": 29918,
-          "logprob": -0.056365967,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -0.016082764,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -0.87841797,
+          "logprob": -1.7158203,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -0.7548828,
+          "logprob": -1.4667969,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.29711914,
+          "logprob": -0.15344238,
           "special": false,
           "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.81591797,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 3057,
+          "logprob": -0.22973633,
+          "special": false,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -0.007045746,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.021957397,
+          "special": false,
+          "text": "\n"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "_uri\nTest request_uri\nTest request"
+    "generated_text": "\nTest request\nTest request\nTest request\n"
   },
   {
     "details": {
@@ -100,80 +101,81 @@
         },
         {
           "id": 4321,
-          "logprob": -9.6015625,
+          "logprob": -9.84375,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -9.6640625,
+          "logprob": -9.59375,
           "text": "request"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29918,
-          "logprob": -2.3828125,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -2.828125,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -1.6386719,
+          "logprob": -2.3378906,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -1.0527344,
+          "logprob": -1.8779297,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.6542969,
+          "logprob": -1.2636719,
           "special": false,
           "text": " request"
         },
-        {
-          "id": 29918,
-          "logprob": -0.055877686,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -0.016021729,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -0.8769531,
+          "logprob": -1.6992188,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -0.7583008,
+          "logprob": -1.4589844,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.29833984,
+          "logprob": -0.15344238,
           "special": false,
           "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.79052734,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 3057,
+          "logprob": -0.22937012,
+          "special": false,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -0.007041931,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.022140503,
+          "special": false,
+          "text": "\n"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "_uri\nTest request_uri\nTest request"
+    "generated_text": "\nTest request\nTest request\nTest request\n"
   },
   {
     "details": {
@@ -188,80 +190,81 @@
         },
         {
           "id": 4321,
-          "logprob": -9.6015625,
+          "logprob": -9.84375,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -9.671875,
+          "logprob": -9.609375,
           "text": "request"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29918,
-          "logprob": -2.3847656,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -2.8144531,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -1.6396484,
+          "logprob": -2.3261719,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -1.0527344,
+          "logprob": -1.8730469,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.65478516,
+          "logprob": -1.2587891,
           "special": false,
           "text": " request"
         },
-        {
-          "id": 29918,
-          "logprob": -0.056243896,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -0.016143799,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -0.8808594,
+          "logprob": -1.6894531,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -0.75341797,
+          "logprob": -1.46875,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.2956543,
+          "logprob": -0.1541748,
           "special": false,
           "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.80322266,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 3057,
+          "logprob": -0.22912598,
+          "special": false,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -0.0070495605,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.021606445,
+          "special": false,
+          "text": "\n"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "_uri\nTest request_uri\nTest request"
+    "generated_text": "\nTest request\nTest request\nTest request\n"
   },
   {
     "details": {
@@ -276,79 +279,80 @@
         },
         {
           "id": 4321,
-          "logprob": -9.6015625,
+          "logprob": -9.84375,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -9.6640625,
+          "logprob": -9.6015625,
           "text": "request"
         }
       ],
       "seed": null,
       "tokens": [
-        {
-          "id": 29918,
-          "logprob": -2.3769531,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -2.8183594,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -1.6396484,
+          "logprob": -2.3320312,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -1.0546875,
+          "logprob": -1.875,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.65478516,
+          "logprob": -1.2646484,
           "special": false,
           "text": " request"
         },
-        {
-          "id": 29918,
-          "logprob": -0.05557251,
-          "special": false,
-          "text": "_"
-        },
-        {
-          "id": 5338,
-          "logprob": -0.01612854,
-          "special": false,
-          "text": "uri"
-        },
         {
           "id": 13,
-          "logprob": -0.8730469,
+          "logprob": -1.6884766,
           "special": false,
           "text": "\n"
         },
         {
           "id": 3057,
-          "logprob": -0.7519531,
+          "logprob": -1.4589844,
           "special": false,
           "text": "Test"
         },
         {
           "id": 2009,
-          "logprob": -0.29785156,
+          "logprob": -0.15185547,
           "special": false,
           "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.79833984,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 3057,
+          "logprob": -0.22827148,
+          "special": false,
+          "text": "Test"
+        },
+        {
+          "id": 2009,
+          "logprob": -0.006996155,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 13,
+          "logprob": -0.021560669,
+          "special": false,
+          "text": "\n"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "_uri\nTest request_uri\nTest request"
+    "generated_text": "\nTest request\nTest request\nTest request\n"
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_all_params.json b/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_all_params.json
new file mode 100644
index 00000000..d8a298eb
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_all_params.json
@@ -0,0 +1,98 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 338,
+        "logprob": -10.0078125,
+        "text": "is"
+      },
+      {
+        "id": 21784,
+        "logprob": -15.515625,
+        "text": "Deep"
+      },
+      {
+        "id": 29257,
+        "logprob": -2.8847656,
+        "text": "Learning"
+      },
+      {
+        "id": 29973,
+        "logprob": -4.140625,
+        "text": "?"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -1.1582031,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 2772,
+        "logprob": -0.23083496,
+        "special": false,
+        "text": "De"
+      },
+      {
+        "id": 1022,
+        "logprob": 0.0,
+        "special": false,
+        "text": "ep"
+      },
+      {
+        "id": 6509,
+        "logprob": 0.0,
+        "special": false,
+        "text": " learning"
+      },
+      {
+        "id": 29892,
+        "logprob": -0.61816406,
+        "special": false,
+        "text": ","
+      },
+      {
+        "id": 607,
+        "logprob": -0.7089844,
+        "special": false,
+        "text": " which"
+      },
+      {
+        "id": 508,
+        "logprob": -1.7724609,
+        "special": false,
+        "text": " can"
+      },
+      {
+        "id": 367,
+        "logprob": 0.0,
+        "special": false,
+        "text": " be"
+      },
+      {
+        "id": 5545,
+        "logprob": 0.0,
+        "special": false,
+        "text": " considered"
+      },
+      {
+        "id": 408,
+        "logprob": -0.3869629,
+        "special": false,
+        "text": " as"
+      }
+    ]
+  },
+  "generated_text": "What is Deep Learning?\nDeep learning, which can be considered as"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_load.json b/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_load.json
new file mode 100644
index 00000000..413af1d7
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_load.json
@@ -0,0 +1,414 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -10.734375,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.5488281,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.2890625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.2753906,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.48046875,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.1845703,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -0.5727539,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.00010967255,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.1239624,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.04510498,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.018295288,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 11306,
+          "logprob": -0.45922852,
+          "special": false,
+          "text": " subset"
+        },
+        {
+          "id": 310,
+          "logprob": -0.00020992756,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 4933,
+          "logprob": -0.0046539307,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.00025844574,
+          "special": false,
+          "text": " learning"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a subset of machine learning"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -10.734375,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.5488281,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.2890625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.2724609,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.47729492,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.1826172,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -0.56689453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.000108003616,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.1239624,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.044433594,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.018295288,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 11306,
+          "logprob": -0.45922852,
+          "special": false,
+          "text": " subset"
+        },
+        {
+          "id": 310,
+          "logprob": -0.0002104044,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 4933,
+          "logprob": -0.004711151,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.00025892258,
+          "special": false,
+          "text": " learning"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a subset of machine learning"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -10.734375,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.5488281,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.2890625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.2724609,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.47729492,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.1826172,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -0.56689453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.000108003616,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.1239624,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.044433594,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.018295288,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 11306,
+          "logprob": -0.45922852,
+          "special": false,
+          "text": " subset"
+        },
+        {
+          "id": 310,
+          "logprob": -0.0002104044,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 4933,
+          "logprob": -0.004711151,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.00025892258,
+          "special": false,
+          "text": " learning"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a subset of machine learning"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1724,
+          "logprob": -10.734375,
+          "text": "What"
+        },
+        {
+          "id": 338,
+          "logprob": -1.5488281,
+          "text": "is"
+        },
+        {
+          "id": 21784,
+          "logprob": -9.2890625,
+          "text": "Deep"
+        },
+        {
+          "id": 29257,
+          "logprob": -1.2724609,
+          "text": "Learning"
+        },
+        {
+          "id": 29973,
+          "logprob": -0.47729492,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -1.1826172,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2772,
+          "logprob": -0.56689453,
+          "special": false,
+          "text": "De"
+        },
+        {
+          "id": 1022,
+          "logprob": -0.000108003616,
+          "special": false,
+          "text": "ep"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.1239624,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 338,
+          "logprob": -0.044433594,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 263,
+          "logprob": -0.018295288,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 11306,
+          "logprob": -0.45922852,
+          "special": false,
+          "text": " subset"
+        },
+        {
+          "id": 310,
+          "logprob": -0.0002104044,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 4933,
+          "logprob": -0.004711151,
+          "special": false,
+          "text": " machine"
+        },
+        {
+          "id": 6509,
+          "logprob": -0.00025892258,
+          "special": false,
+          "text": " learning"
+        }
+      ]
+    },
+    "generated_text": "\nDeep learning is a subset of machine learning"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_simple.json b/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_simple.json
new file mode 100644
index 00000000..15754b14
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_medusa/test_flash_medusa_simple.json
@@ -0,0 +1,103 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 1724,
+        "logprob": -10.734375,
+        "text": "What"
+      },
+      {
+        "id": 338,
+        "logprob": -1.5488281,
+        "text": "is"
+      },
+      {
+        "id": 21784,
+        "logprob": -9.2890625,
+        "text": "Deep"
+      },
+      {
+        "id": 29257,
+        "logprob": -1.2753906,
+        "text": "Learning"
+      },
+      {
+        "id": 29973,
+        "logprob": -0.48046875,
+        "text": "?"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -1.1845703,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 2772,
+        "logprob": -0.5727539,
+        "special": false,
+        "text": "De"
+      },
+      {
+        "id": 1022,
+        "logprob": -0.000108122826,
+        "special": false,
+        "text": "ep"
+      },
+      {
+        "id": 6509,
+        "logprob": -0.1239624,
+        "special": false,
+        "text": " learning"
+      },
+      {
+        "id": 338,
+        "logprob": -0.044433594,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 263,
+        "logprob": -0.01852417,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 11306,
+        "logprob": -0.45922852,
+        "special": false,
+        "text": " subset"
+      },
+      {
+        "id": 310,
+        "logprob": -0.0002104044,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 4933,
+        "logprob": -0.004787445,
+        "special": false,
+        "text": " machine"
+      },
+      {
+        "id": 6509,
+        "logprob": -0.00026226044,
+        "special": false,
+        "text": " learning"
+      }
+    ]
+  },
+  "generated_text": "\nDeep learning is a subset of machine learning"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json
new file mode 100644
index 00000000..51d969b2
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi.json
@@ -0,0 +1,84 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 14402,
+        "logprob": null,
+        "text": "Test"
+      },
+      {
+        "id": 2581,
+        "logprob": -11.6171875,
+        "text": " request"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 25,
+        "logprob": -2.3203125,
+        "special": false,
+        "text": ":"
+      },
+      {
+        "id": 1391,
+        "logprob": -0.98779297,
+        "special": false,
+        "text": " {"
+      },
+      {
+        "id": 25927,
+        "logprob": -0.76660156,
+        "special": false,
+        "text": "request"
+      },
+      {
+        "id": 92,
+        "logprob": -0.7246094,
+        "special": false,
+        "text": "}"
+      },
+      {
+        "id": 4943,
+        "logprob": -0.41333008,
+        "special": false,
+        "text": "\")"
+      },
+      {
+        "id": 198,
+        "logprob": -0.11785889,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 50280,
+        "logprob": -0.97265625,
+        "special": false,
+        "text": "        "
+      },
+      {
+        "id": 26209,
+        "logprob": -1.4414062,
+        "special": false,
+        "text": "response"
+      },
+      {
+        "id": 796,
+        "logprob": -0.0569458,
+        "special": false,
+        "text": " ="
+      },
+      {
+        "id": 2116,
+        "logprob": -1.1533203,
+        "special": false,
+        "text": " self"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": ": {request}\")\n        response = self"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json
new file mode 100644
index 00000000..221ff13d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json
@@ -0,0 +1,60 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "stop_sequence",
+    "generated_tokens": 6,
+    "prefill": [
+      {
+        "id": 14402,
+        "logprob": null,
+        "text": "Test"
+      },
+      {
+        "id": 2581,
+        "logprob": -11.6171875,
+        "text": " request"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 284,
+        "logprob": -0.19421387,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 3758,
+        "logprob": -0.62597656,
+        "special": false,
+        "text": " send"
+      },
+      {
+        "id": 1366,
+        "logprob": -0.87060547,
+        "special": false,
+        "text": " data"
+      },
+      {
+        "id": 625,
+        "logprob": -0.88427734,
+        "special": false,
+        "text": " over"
+      },
+      {
+        "id": 257,
+        "logprob": -1.0830078,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 3127,
+        "logprob": -1.9462891,
+        "special": false,
+        "text": " network"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Test request to send data over a network"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json
new file mode 100644
index 00000000..62f7fd32
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_load.json
@@ -0,0 +1,338 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 14402,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 2581,
+          "logprob": -11.6171875,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 25,
+          "logprob": -2.3203125,
+          "special": false,
+          "text": ":"
+        },
+        {
+          "id": 1391,
+          "logprob": -0.98779297,
+          "special": false,
+          "text": " {"
+        },
+        {
+          "id": 25927,
+          "logprob": -0.7729492,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 92,
+          "logprob": -0.7241211,
+          "special": false,
+          "text": "}"
+        },
+        {
+          "id": 4943,
+          "logprob": -0.4091797,
+          "special": false,
+          "text": "\")"
+        },
+        {
+          "id": 198,
+          "logprob": -0.119018555,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 50280,
+          "logprob": -0.9707031,
+          "special": false,
+          "text": "        "
+        },
+        {
+          "id": 26209,
+          "logprob": -1.4414062,
+          "special": false,
+          "text": "response"
+        },
+        {
+          "id": 796,
+          "logprob": -0.056854248,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 2116,
+          "logprob": -1.1533203,
+          "special": false,
+          "text": " self"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": ": {request}\")\n        response = self"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json
new file mode 100644
index 00000000..7219f9e6
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json
@@ -0,0 +1,84 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 2271,
+        "logprob": null,
+        "text": "Test"
+      },
+      {
+        "id": 1681,
+        "logprob": -8.8515625,
+        "text": " request"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 198,
+        "logprob": -2.9023438,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 2,
+        "logprob": -2.9160156,
+        "special": false,
+        "text": "#"
+      },
+      {
+        "id": 4230,
+        "logprob": -3.1035156,
+        "special": false,
+        "text": " Create"
+      },
+      {
+        "id": 264,
+        "logprob": -1.1025391,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 1681,
+        "logprob": -1.6914062,
+        "special": false,
+        "text": " request"
+      },
+      {
+        "id": 198,
+        "logprob": -1.1953125,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 2035,
+        "logprob": -1.3203125,
+        "special": false,
+        "text": "request"
+      },
+      {
+        "id": 284,
+        "logprob": -0.13537598,
+        "special": false,
+        "text": " ="
+      },
+      {
+        "id": 7388,
+        "logprob": -1.2402344,
+        "special": false,
+        "text": " requests"
+      },
+      {
+        "id": 670,
+        "logprob": -0.2775879,
+        "special": false,
+        "text": ".get"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\n# Create a request\nrequest = requests.get"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json
new file mode 100644
index 00000000..4a2936af
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json
@@ -0,0 +1,84 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 2271,
+        "logprob": null,
+        "text": "Test"
+      },
+      {
+        "id": 1681,
+        "logprob": -8.8515625,
+        "text": " request"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 311,
+        "logprob": -1.4277344,
+        "special": false,
+        "text": " to"
+      },
+      {
+        "id": 279,
+        "logprob": -0.65478516,
+        "special": false,
+        "text": " the"
+      },
+      {
+        "id": 2473,
+        "logprob": -1.8300781,
+        "special": false,
+        "text": " service"
+      },
+      {
+        "id": 382,
+        "logprob": -0.75,
+        "special": false,
+        "text": ".\n\n"
+      },
+      {
+        "id": 286,
+        "logprob": -0.11621094,
+        "special": false,
+        "text": "       "
+      },
+      {
+        "id": 549,
+        "logprob": 0.0,
+        "special": false,
+        "text": " :"
+      },
+      {
+        "id": 689,
+        "logprob": -0.48608398,
+        "special": false,
+        "text": "return"
+      },
+      {
+        "id": 25,
+        "logprob": 0.0,
+        "special": false,
+        "text": ":"
+      },
+      {
+        "id": 5949,
+        "logprob": -0.5756836,
+        "special": false,
+        "text": " Response"
+      },
+      {
+        "id": 504,
+        "logprob": -0.24499512,
+        "special": false,
+        "text": " from"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Test request to the service.\n\n        :return: Response from"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json
new file mode 100644
index 00000000..4786ff24
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json
@@ -0,0 +1,338 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2271,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 1681,
+          "logprob": -8.8515625,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 198,
+          "logprob": -2.9023438,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2,
+          "logprob": -2.9140625,
+          "special": false,
+          "text": "#"
+        },
+        {
+          "id": 4230,
+          "logprob": -3.1054688,
+          "special": false,
+          "text": " Create"
+        },
+        {
+          "id": 264,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 1681,
+          "logprob": -1.6914062,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 198,
+          "logprob": -1.1923828,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2035,
+          "logprob": -1.3193359,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 284,
+          "logprob": -0.13586426,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 7388,
+          "logprob": -1.2412109,
+          "special": false,
+          "text": " requests"
+        },
+        {
+          "id": 670,
+          "logprob": -0.2775879,
+          "special": false,
+          "text": ".get"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n# Create a request\nrequest = requests.get"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2271,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 1681,
+          "logprob": -8.8515625,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 198,
+          "logprob": -2.9023438,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2,
+          "logprob": -2.9140625,
+          "special": false,
+          "text": "#"
+        },
+        {
+          "id": 4230,
+          "logprob": -3.1054688,
+          "special": false,
+          "text": " Create"
+        },
+        {
+          "id": 264,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 1681,
+          "logprob": -1.6914062,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 198,
+          "logprob": -1.1923828,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2035,
+          "logprob": -1.3193359,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 284,
+          "logprob": -0.13586426,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 7388,
+          "logprob": -1.2412109,
+          "special": false,
+          "text": " requests"
+        },
+        {
+          "id": 670,
+          "logprob": -0.2775879,
+          "special": false,
+          "text": ".get"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n# Create a request\nrequest = requests.get"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2271,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 1681,
+          "logprob": -8.8515625,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 198,
+          "logprob": -2.9023438,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2,
+          "logprob": -2.9140625,
+          "special": false,
+          "text": "#"
+        },
+        {
+          "id": 4230,
+          "logprob": -3.1054688,
+          "special": false,
+          "text": " Create"
+        },
+        {
+          "id": 264,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 1681,
+          "logprob": -1.6914062,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 198,
+          "logprob": -1.1923828,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2035,
+          "logprob": -1.3193359,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 284,
+          "logprob": -0.13586426,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 7388,
+          "logprob": -1.2412109,
+          "special": false,
+          "text": " requests"
+        },
+        {
+          "id": 670,
+          "logprob": -0.2775879,
+          "special": false,
+          "text": ".get"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n# Create a request\nrequest = requests.get"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 2271,
+          "logprob": null,
+          "text": "Test"
+        },
+        {
+          "id": 1681,
+          "logprob": -8.8515625,
+          "text": " request"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 198,
+          "logprob": -2.9023438,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2,
+          "logprob": -2.9140625,
+          "special": false,
+          "text": "#"
+        },
+        {
+          "id": 4230,
+          "logprob": -3.1054688,
+          "special": false,
+          "text": " Create"
+        },
+        {
+          "id": 264,
+          "logprob": -1.0966797,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 1681,
+          "logprob": -1.6914062,
+          "special": false,
+          "text": " request"
+        },
+        {
+          "id": 198,
+          "logprob": -1.1923828,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 2035,
+          "logprob": -1.3193359,
+          "special": false,
+          "text": "request"
+        },
+        {
+          "id": 284,
+          "logprob": -0.13586426,
+          "special": false,
+          "text": " ="
+        },
+        {
+          "id": 7388,
+          "logprob": -1.2412109,
+          "special": false,
+          "text": " requests"
+        },
+        {
+          "id": 670,
+          "logprob": -0.2775879,
+          "special": false,
+          "text": ".get"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n# Create a request\nrequest = requests.get"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2.json b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2.json
new file mode 100644
index 00000000..36a2ff4d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2.json
@@ -0,0 +1,94 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 610,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 1489,
+        "logprob": -5.2617188,
+        "text": " print"
+      },
+      {
+        "id": 100,
+        "logprob": -0.38476562,
+        "text": "_"
+      },
+      {
+        "id": 7670,
+        "logprob": -7.640625,
+        "text": "hello"
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 2284,
+        "logprob": -0.92626953,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 303,
+        "logprob": -0.40844727,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 1489,
+        "logprob": -0.27905273,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 459,
+        "logprob": -0.6118164,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 8302,
+        "logprob": -0.68652344,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 10914,
+        "logprob": -1.4619141,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 16013,
+        "logprob": -0.7993164,
+        "special": false,
+        "text": "!\")"
+      },
+      {
+        "id": 222,
+        "logprob": -0.63134766,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 222,
+        "logprob": -0.23278809,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 610,
+        "logprob": -1.2294922,
+        "special": false,
+        "text": "def"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "():\n    print(\"Hello World!\")\n\ndef"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json
new file mode 100644
index 00000000..38117272
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_default_params.json
@@ -0,0 +1,394 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 60,
+    "prefill": [
+      {
+        "id": 610,
+        "logprob": null,
+        "text": "def"
+      },
+      {
+        "id": 1489,
+        "logprob": -5.2617188,
+        "text": " print"
+      },
+      {
+        "id": 100,
+        "logprob": -0.38476562,
+        "text": "_"
+      },
+      {
+        "id": 7670,
+        "logprob": -7.640625,
+        "text": "hello"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 2284,
+        "logprob": -0.296875,
+        "special": false,
+        "text": "():"
+      },
+      {
+        "id": 303,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 1489,
+        "logprob": 0.0,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 459,
+        "logprob": 0.0,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 8302,
+        "logprob": -0.28125,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 10914,
+        "logprob": -0.79248047,
+        "special": false,
+        "text": " World"
+      },
+      {
+        "id": 16013,
+        "logprob": -0.61816406,
+        "special": false,
+        "text": "!\")"
+      },
+      {
+        "id": 222,
+        "logprob": -0.0619812,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 222,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 610,
+        "logprob": -0.4091797,
+        "special": false,
+        "text": "def"
+      },
+      {
+        "id": 1489,
+        "logprob": 0.0,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 100,
+        "logprob": 0.0,
+        "special": false,
+        "text": "_"
+      },
+      {
+        "id": 7670,
+        "logprob": 0.0,
+        "special": false,
+        "text": "hello"
+      },
+      {
+        "id": 100,
+        "logprob": 0.0,
+        "special": false,
+        "text": "_"
+      },
+      {
+        "id": 444,
+        "logprob": -0.21655273,
+        "special": false,
+        "text": "name"
+      },
+      {
+        "id": 45,
+        "logprob": 0.0,
+        "special": false,
+        "text": "("
+      },
+      {
+        "id": 444,
+        "logprob": 0.0,
+        "special": false,
+        "text": "name"
+      },
+      {
+        "id": 731,
+        "logprob": 0.0,
+        "special": false,
+        "text": "):"
+      },
+      {
+        "id": 303,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 1489,
+        "logprob": 0.0,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 459,
+        "logprob": 0.0,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 8302,
+        "logprob": 0.0,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 332,
+        "logprob": -0.034698486,
+        "special": false,
+        "text": " \""
+      },
+      {
+        "id": 494,
+        "logprob": 0.0,
+        "special": false,
+        "text": " +"
+      },
+      {
+        "id": 655,
+        "logprob": 0.0,
+        "special": false,
+        "text": " name"
+      },
+      {
+        "id": 494,
+        "logprob": -0.20141602,
+        "special": false,
+        "text": " +"
+      },
+      {
+        "id": 332,
+        "logprob": 0.0,
+        "special": false,
+        "text": " \""
+      },
+      {
+        "id": 16013,
+        "logprob": 0.0,
+        "special": false,
+        "text": "!\")"
+      },
+      {
+        "id": 222,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 222,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 610,
+        "logprob": 0.0,
+        "special": false,
+        "text": "def"
+      },
+      {
+        "id": 1489,
+        "logprob": 0.0,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 100,
+        "logprob": 0.0,
+        "special": false,
+        "text": "_"
+      },
+      {
+        "id": 7670,
+        "logprob": 0.0,
+        "special": false,
+        "text": "hello"
+      },
+      {
+        "id": 100,
+        "logprob": 0.0,
+        "special": false,
+        "text": "_"
+      },
+      {
+        "id": 444,
+        "logprob": 0.0,
+        "special": false,
+        "text": "name"
+      },
+      {
+        "id": 100,
+        "logprob": 0.0,
+        "special": false,
+        "text": "_"
+      },
+      {
+        "id": 400,
+        "logprob": 0.0,
+        "special": false,
+        "text": "age"
+      },
+      {
+        "id": 45,
+        "logprob": 0.0,
+        "special": false,
+        "text": "("
+      },
+      {
+        "id": 444,
+        "logprob": 0.0,
+        "special": false,
+        "text": "name"
+      },
+      {
+        "id": 49,
+        "logprob": 0.0,
+        "special": false,
+        "text": ","
+      },
+      {
+        "id": 11505,
+        "logprob": 0.0,
+        "special": false,
+        "text": " age"
+      },
+      {
+        "id": 731,
+        "logprob": 0.0,
+        "special": false,
+        "text": "):"
+      },
+      {
+        "id": 303,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n   "
+      },
+      {
+        "id": 1489,
+        "logprob": 0.0,
+        "special": false,
+        "text": " print"
+      },
+      {
+        "id": 459,
+        "logprob": 0.0,
+        "special": false,
+        "text": "(\""
+      },
+      {
+        "id": 8302,
+        "logprob": 0.0,
+        "special": false,
+        "text": "Hello"
+      },
+      {
+        "id": 332,
+        "logprob": 0.0,
+        "special": false,
+        "text": " \""
+      },
+      {
+        "id": 494,
+        "logprob": 0.0,
+        "special": false,
+        "text": " +"
+      },
+      {
+        "id": 655,
+        "logprob": 0.0,
+        "special": false,
+        "text": " name"
+      },
+      {
+        "id": 494,
+        "logprob": 0.0,
+        "special": false,
+        "text": " +"
+      },
+      {
+        "id": 3021,
+        "logprob": -0.5761719,
+        "special": false,
+        "text": " \","
+      },
+      {
+        "id": 863,
+        "logprob": 0.0,
+        "special": false,
+        "text": " you"
+      },
+      {
+        "id": 904,
+        "logprob": 0.0,
+        "special": false,
+        "text": " are"
+      },
+      {
+        "id": 332,
+        "logprob": 0.0,
+        "special": false,
+        "text": " \""
+      },
+      {
+        "id": 494,
+        "logprob": 0.0,
+        "special": false,
+        "text": " +"
+      },
+      {
+        "id": 615,
+        "logprob": 0.0,
+        "special": false,
+        "text": " str"
+      },
+      {
+        "id": 45,
+        "logprob": 0.0,
+        "special": false,
+        "text": "("
+      },
+      {
+        "id": 400,
+        "logprob": 0.0,
+        "special": false,
+        "text": "age"
+      },
+      {
+        "id": 46,
+        "logprob": 0.0,
+        "special": false,
+        "text": ")"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "():\n    print(\"Hello World!\")\n\ndef print_hello_name(name):\n    print(\"Hello \" + name + \"!\")\n\ndef print_hello_name_age(name, age):\n    print(\"Hello \" + name + \", you are \" + str(age)"
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_load.json
new file mode 100644
index 00000000..9e82d4be
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder2/test_flash_starcoder2_load.json
@@ -0,0 +1,378 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 610,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1489,
+          "logprob": -5.2617188,
+          "text": " print"
+        },
+        {
+          "id": 100,
+          "logprob": -0.38476562,
+          "text": "_"
+        },
+        {
+          "id": 7670,
+          "logprob": -7.640625,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2284,
+          "logprob": -0.92626953,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 303,
+          "logprob": -0.40722656,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1489,
+          "logprob": -0.27954102,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 459,
+          "logprob": -0.6142578,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8302,
+          "logprob": -0.68310547,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10914,
+          "logprob": -1.4570312,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 16013,
+          "logprob": -0.80126953,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 222,
+          "logprob": -0.6303711,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 222,
+          "logprob": -0.23327637,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 610,
+          "logprob": -1.2304688,
+          "special": false,
+          "text": "def"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 610,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1489,
+          "logprob": -5.2617188,
+          "text": " print"
+        },
+        {
+          "id": 100,
+          "logprob": -0.38476562,
+          "text": "_"
+        },
+        {
+          "id": 7670,
+          "logprob": -7.640625,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2284,
+          "logprob": -0.92626953,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 303,
+          "logprob": -0.40722656,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1489,
+          "logprob": -0.27954102,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 459,
+          "logprob": -0.6142578,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8302,
+          "logprob": -0.68310547,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10914,
+          "logprob": -1.4570312,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 16013,
+          "logprob": -0.80126953,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 222,
+          "logprob": -0.6303711,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 222,
+          "logprob": -0.23327637,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 610,
+          "logprob": -1.2304688,
+          "special": false,
+          "text": "def"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 610,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1489,
+          "logprob": -5.2617188,
+          "text": " print"
+        },
+        {
+          "id": 100,
+          "logprob": -0.38476562,
+          "text": "_"
+        },
+        {
+          "id": 7670,
+          "logprob": -7.640625,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2284,
+          "logprob": -0.92626953,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 303,
+          "logprob": -0.40722656,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1489,
+          "logprob": -0.27954102,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 459,
+          "logprob": -0.6142578,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8302,
+          "logprob": -0.68310547,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10914,
+          "logprob": -1.4570312,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 16013,
+          "logprob": -0.80126953,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 222,
+          "logprob": -0.6303711,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 222,
+          "logprob": -0.23327637,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 610,
+          "logprob": -1.2304688,
+          "special": false,
+          "text": "def"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\ndef"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 610,
+          "logprob": null,
+          "text": "def"
+        },
+        {
+          "id": 1489,
+          "logprob": -5.2617188,
+          "text": " print"
+        },
+        {
+          "id": 100,
+          "logprob": -0.38476562,
+          "text": "_"
+        },
+        {
+          "id": 7670,
+          "logprob": -7.640625,
+          "text": "hello"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 2284,
+          "logprob": -0.92626953,
+          "special": false,
+          "text": "():"
+        },
+        {
+          "id": 303,
+          "logprob": -0.40722656,
+          "special": false,
+          "text": "\n   "
+        },
+        {
+          "id": 1489,
+          "logprob": -0.27954102,
+          "special": false,
+          "text": " print"
+        },
+        {
+          "id": 459,
+          "logprob": -0.6142578,
+          "special": false,
+          "text": "(\""
+        },
+        {
+          "id": 8302,
+          "logprob": -0.68310547,
+          "special": false,
+          "text": "Hello"
+        },
+        {
+          "id": 10914,
+          "logprob": -1.4570312,
+          "special": false,
+          "text": " World"
+        },
+        {
+          "id": 16013,
+          "logprob": -0.80126953,
+          "special": false,
+          "text": "!\")"
+        },
+        {
+          "id": 222,
+          "logprob": -0.6303711,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 222,
+          "logprob": -0.23327637,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 610,
+          "logprob": -1.2304688,
+          "special": false,
+          "text": "def"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "():\n    print(\"Hello World!\")\n\ndef"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
index 53055e42..5e537bb7 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq.json
@@ -1,193 +1,194 @@
 {
-  "generated_text": "\n    return sum(L) / len(L)\n\n\ndef geometric_mean(L",
   "details": {
     "best_of_sequences": null,
     "finish_reason": "length",
     "generated_tokens": 20,
-    "seed": null,
     "prefill": [
       {
         "id": 589,
-        "text": "def",
-        "logprob": null
+        "logprob": null,
+        "text": "def"
       },
       {
         "id": 3226,
-        "text": " ge",
-        "logprob": -9.0234375
+        "logprob": -8.5859375,
+        "text": " ge"
       },
       {
         "id": 21017,
-        "text": "ometric",
-        "logprob": -9.0859375
+        "logprob": -7.5859375,
+        "text": "ometric"
       },
       {
         "id": 81,
-        "text": "_",
-        "logprob": -0.25878906
+        "logprob": -0.2668457,
+        "text": "_"
       },
       {
         "id": 6009,
-        "text": "mean",
-        "logprob": -2.2109375
+        "logprob": -1.6416016,
+        "text": "mean"
       },
       {
         "id": 26,
-        "text": "(",
-        "logprob": -0.30371094
+        "logprob": -0.22705078,
+        "text": "("
       },
       {
         "id": 62,
-        "text": "L",
-        "logprob": -5.6054688
+        "logprob": -5.2304688,
+        "text": "L"
       },
       {
         "id": 44,
-        "text": ":",
-        "logprob": -3.0722656
+        "logprob": -3.0976562,
+        "text": ":"
       },
       {
         "id": 1682,
-        "text": " List",
-        "logprob": -0.6879883
+        "logprob": -1.1044922,
+        "text": " List"
       },
       {
         "id": 77,
-        "text": "[",
-        "logprob": -0.38500977
+        "logprob": -0.14294434,
+        "text": "["
       },
       {
         "id": 1808,
-        "text": "float",
-        "logprob": -0.984375
+        "logprob": -0.32299805,
+        "text": "float"
       },
       {
         "id": 10794,
-        "text": "]):",
-        "logprob": -2.5351562
+        "logprob": -2.8164062,
+        "text": "]):"
       }
     ],
+    "seed": null,
     "tokens": [
       {
         "id": 284,
-        "text": "\n   ",
-        "logprob": -1.1738281,
-        "special": false
+        "logprob": -0.1282959,
+        "special": false,
+        "text": "\n   "
       },
       {
-        "id": 442,
-        "text": " return",
-        "logprob": -0.95947266,
-        "special": false
+        "id": 1524,
+        "logprob": -0.97998047,
+        "special": false,
+        "text": " \"\"\""
       },
       {
-        "id": 3632,
-        "text": " sum",
-        "logprob": -1.4199219,
-        "special": false
+        "id": 284,
+        "logprob": -0.7006836,
+        "special": false,
+        "text": "\n   "
       },
       {
-        "id": 26,
-        "text": "(",
-        "logprob": -0.085876465,
-        "special": false
+        "id": 14883,
+        "logprob": -2.1933594,
+        "special": false,
+        "text": " Calculate"
       },
       {
-        "id": 62,
-        "text": "L",
-        "logprob": -0.09875488,
-        "special": false
-      },
-      {
-        "id": 27,
-        "text": ")",
-        "logprob": -0.30517578,
-        "special": false
-      },
-      {
-        "id": 517,
-        "text": " /",
-        "logprob": -0.42089844,
-        "special": false
-      },
-      {
-        "id": 2069,
-        "text": " len",
-        "logprob": -0.042053223,
-        "special": false
-      },
-      {
-        "id": 26,
-        "text": "(",
-        "logprob": -0.0011806488,
-        "special": false
-      },
-      {
-        "id": 62,
-        "text": "L",
-        "logprob": -0.0005259514,
-        "special": false
-      },
-      {
-        "id": 27,
-        "text": ")",
-        "logprob": -0.0017633438,
-        "special": false
-      },
-      {
-        "id": 478,
-        "text": "\n\n",
-        "logprob": -0.69189453,
-        "special": false
-      },
-      {
-        "id": 203,
-        "text": "\n",
-        "logprob": -0.041870117,
-        "special": false
-      },
-      {
-        "id": 589,
-        "text": "def",
-        "logprob": -0.27856445,
-        "special": false
+        "id": 322,
+        "logprob": -0.2697754,
+        "special": false,
+        "text": " the"
       },
       {
         "id": 3226,
-        "text": " ge",
-        "logprob": -1.7255859,
-        "special": false
+        "logprob": -0.0836792,
+        "special": false,
+        "text": " ge"
       },
       {
         "id": 21017,
-        "text": "ometric",
-        "logprob": -0.011291504,
-        "special": false
+        "logprob": -0.018737793,
+        "special": false,
+        "text": "ometric"
       },
       {
-        "id": 81,
-        "text": "_",
-        "logprob": -0.008430481,
-        "special": false
+        "id": 5651,
+        "logprob": -0.028640747,
+        "special": false,
+        "text": " mean"
       },
       {
-        "id": 6009,
-        "text": "mean",
-        "logprob": -0.025787354,
-        "special": false
+        "id": 432,
+        "logprob": -0.29467773,
+        "special": false,
+        "text": " of"
       },
       {
-        "id": 26,
-        "text": "(",
-        "logprob": -0.073913574,
-        "special": false
+        "id": 312,
+        "logprob": -0.31518555,
+        "special": false,
+        "text": " a"
       },
       {
-        "id": 62,
-        "text": "L",
-        "logprob": -0.09967041,
-        "special": false
+        "id": 1149,
+        "logprob": -0.20605469,
+        "special": false,
+        "text": " list"
+      },
+      {
+        "id": 432,
+        "logprob": -0.23254395,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 7515,
+        "logprob": -0.4489746,
+        "special": false,
+        "text": " numbers"
+      },
+      {
+        "id": 32,
+        "logprob": -0.6044922,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 446,
+        "logprob": -0.63964844,
+        "special": false,
+        "text": "\n\n   "
+      },
+      {
+        "id": 499,
+        "logprob": -1.1953125,
+        "special": false,
+        "text": " :"
+      },
+      {
+        "id": 753,
+        "logprob": -0.03515625,
+        "special": false,
+        "text": "param"
+      },
+      {
+        "id": 498,
+        "logprob": -0.06311035,
+        "special": false,
+        "text": " L"
+      },
+      {
+        "id": 44,
+        "logprob": -0.003414154,
+        "special": false,
+        "text": ":"
+      },
+      {
+        "id": 1682,
+        "logprob": -1.3310547,
+        "special": false,
+        "text": " List"
       }
-    ]
-  }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a list of numbers.\n\n    :param L: List"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
index 5598a2ad..bf0f5146 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_default_params.json
@@ -11,57 +11,57 @@
       },
       {
         "id": 3226,
-        "logprob": -9.0234375,
+        "logprob": -8.5859375,
         "text": " ge"
       },
       {
         "id": 21017,
-        "logprob": -9.09375,
+        "logprob": -7.5898438,
         "text": "ometric"
       },
       {
         "id": 81,
-        "logprob": -0.25976562,
+        "logprob": -0.26586914,
         "text": "_"
       },
       {
         "id": 6009,
-        "logprob": -2.2148438,
+        "logprob": -1.6347656,
         "text": "mean"
       },
       {
         "id": 26,
-        "logprob": -0.3010254,
+        "logprob": -0.22705078,
         "text": "("
       },
       {
         "id": 62,
-        "logprob": -5.6757812,
+        "logprob": -5.2382812,
         "text": "L"
       },
       {
         "id": 44,
-        "logprob": -3.0898438,
+        "logprob": -3.0996094,
         "text": ":"
       },
       {
         "id": 1682,
-        "logprob": -0.6791992,
+        "logprob": -1.1025391,
         "text": " List"
       },
       {
         "id": 77,
-        "logprob": -0.38891602,
+        "logprob": -0.14294434,
         "text": "["
       },
       {
         "id": 1808,
-        "logprob": -0.92041016,
+        "logprob": -0.32226562,
         "text": "float"
       },
       {
         "id": 10794,
-        "logprob": -2.5390625,
+        "logprob": -2.8164062,
         "text": "]):"
       }
     ],
@@ -75,13 +75,13 @@
       },
       {
         "id": 442,
-        "logprob": 0.0,
+        "logprob": -1.3134766,
         "special": false,
         "text": " return"
       },
       {
         "id": 11665,
-        "logprob": -1.6005859,
+        "logprob": -0.10021973,
         "special": false,
         "text": " reduce"
       },
@@ -129,7 +129,7 @@
       },
       {
         "id": 319,
-        "logprob": 0.0,
+        "logprob": -0.42871094,
         "special": false,
         "text": " *"
       },
@@ -158,36 +158,37 @@
         "text": ")"
       },
       {
-        "id": 203,
-        "logprob": -0.11968994,
-        "special": false,
-        "text": "\n"
-      },
-      {
-        "id": 203,
+        "id": 1115,
         "logprob": 0.0,
         "special": false,
-        "text": "\n"
+        "text": " **"
       },
       {
-        "id": 589,
+        "id": 308,
         "logprob": 0.0,
         "special": false,
-        "text": "def"
+        "text": " ("
       },
       {
-        "id": 3226,
+        "id": 35,
         "logprob": 0.0,
         "special": false,
-        "text": " ge"
+        "text": "1"
       },
       {
-        "id": 21017,
+        "id": 32,
+        "logprob": -0.31323242,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 34,
         "logprob": 0.0,
         "special": false,
-        "text": "ometric"
+        "text": "0"
       }
-    ]
+    ],
+    "top_tokens": null
   },
-  "generated_text": "\n    return reduce(lambda x, y: x * y, L)\n\ndef geometric"
+  "generated_text": "\n    return reduce(lambda x, y: x * y, L) ** (1.0"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
index 5381ce5a..46a21ed8 100644
--- a/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
+++ b/integration-tests/models/__snapshots__/test_flash_starcoder_gptq/test_flash_starcoder_gptq_load.json
@@ -12,57 +12,57 @@
         },
         {
           "id": 3226,
-          "logprob": -9.0234375,
+          "logprob": -8.5859375,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -9.0859375,
+          "logprob": -7.5820312,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.25927734,
+          "logprob": -0.26708984,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -2.25,
+          "logprob": -1.6386719,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.30126953,
+          "logprob": -0.22717285,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.7539062,
+          "logprob": -5.234375,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.0878906,
+          "logprob": -3.1015625,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -0.6845703,
+          "logprob": -1.1083984,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.3918457,
+          "logprob": -0.14294434,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.8798828,
+          "logprob": -0.32592773,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.4980469,
+          "logprob": -2.8164062,
           "text": "]):"
         }
       ],
@@ -70,67 +70,68 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -1.1533203,
+          "logprob": -0.12817383,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 442,
-          "logprob": -0.91796875,
+          "id": 1524,
+          "logprob": -0.9863281,
           "special": false,
-          "text": " return"
+          "text": " \"\"\""
         },
         {
-          "id": 3632,
-          "logprob": -1.3291016,
+          "id": 284,
+          "logprob": -0.7011719,
           "special": false,
-          "text": " sum"
+          "text": "\n   "
         },
         {
-          "id": 26,
-          "logprob": -0.08062744,
+          "id": 14883,
+          "logprob": -2.2050781,
           "special": false,
-          "text": "("
+          "text": " Calculate"
         },
         {
-          "id": 62,
-          "logprob": -0.097717285,
+          "id": 322,
+          "logprob": -0.2668457,
           "special": false,
-          "text": "L"
+          "text": " the"
         },
         {
-          "id": 27,
-          "logprob": -0.29003906,
+          "id": 3226,
+          "logprob": -0.08465576,
           "special": false,
-          "text": ")"
+          "text": " ge"
         },
         {
-          "id": 517,
-          "logprob": -0.34958984,
+          "id": 21017,
+          "logprob": -0.019012451,
           "special": false,
-          "text": " /"
+          "text": "ometric"
         },
         {
-          "id": 2069,
-          "logprob": -0.03829956,
+          "id": 5651,
+          "logprob": -0.028625488,
           "special": false,
-          "text": " len"
+          "text": " mean"
         },
         {
-          "id": 26,
-          "logprob": -0.0011987686,
+          "id": 432,
+          "logprob": -0.29418945,
           "special": false,
-          "text": "("
+          "text": " of"
         },
         {
-          "id": 62,
-          "logprob": -0.00050878525,
+          "id": 312,
+          "logprob": -0.3161621,
           "special": false,
-          "text": "L"
+          "text": " a"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "\n    return sum(L) / len(L"
+    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
   },
   {
     "details": {
@@ -145,57 +146,57 @@
         },
         {
           "id": 3226,
-          "logprob": -9.0234375,
+          "logprob": -8.5859375,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -9.0859375,
+          "logprob": -7.59375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.25878906,
+          "logprob": -0.26953125,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -2.2109375,
+          "logprob": -1.640625,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.30371094,
+          "logprob": -0.22705078,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.6054688,
+          "logprob": -5.234375,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.0722656,
+          "logprob": -3.1132812,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -0.6879883,
+          "logprob": -1.1123047,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.38500977,
+          "logprob": -0.14294434,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.984375,
+          "logprob": -0.32299805,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.5351562,
+          "logprob": -2.8164062,
           "text": "]):"
         }
       ],
@@ -203,67 +204,68 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -1.1738281,
+          "logprob": -0.12854004,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 442,
-          "logprob": -0.9584961,
+          "id": 1524,
+          "logprob": -0.9897461,
           "special": false,
-          "text": " return"
+          "text": " \"\"\""
         },
         {
-          "id": 3632,
-          "logprob": -1.4169922,
+          "id": 284,
+          "logprob": -0.69970703,
           "special": false,
-          "text": " sum"
+          "text": "\n   "
         },
         {
-          "id": 26,
-          "logprob": -0.085876465,
+          "id": 14883,
+          "logprob": -2.2050781,
           "special": false,
-          "text": "("
+          "text": " Calculate"
         },
         {
-          "id": 62,
-          "logprob": -0.0982666,
+          "id": 322,
+          "logprob": -0.2668457,
           "special": false,
-          "text": "L"
+          "text": " the"
         },
         {
-          "id": 27,
-          "logprob": -0.3022461,
+          "id": 3226,
+          "logprob": -0.08496094,
           "special": false,
-          "text": ")"
+          "text": " ge"
         },
         {
-          "id": 517,
-          "logprob": -0.40504883,
+          "id": 21017,
+          "logprob": -0.019012451,
           "special": false,
-          "text": " /"
+          "text": "ometric"
         },
         {
-          "id": 2069,
-          "logprob": -0.041656494,
+          "id": 5651,
+          "logprob": -0.029037476,
           "special": false,
-          "text": " len"
+          "text": " mean"
         },
         {
-          "id": 26,
-          "logprob": -0.0011844635,
+          "id": 432,
+          "logprob": -0.2939453,
           "special": false,
-          "text": "("
+          "text": " of"
         },
         {
-          "id": 62,
-          "logprob": -0.0005264282,
+          "id": 312,
+          "logprob": -0.31591797,
           "special": false,
-          "text": "L"
+          "text": " a"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "\n    return sum(L) / len(L"
+    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
   },
   {
     "details": {
@@ -278,57 +280,57 @@
         },
         {
           "id": 3226,
-          "logprob": -9.0234375,
+          "logprob": -8.5859375,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -9.0859375,
+          "logprob": -7.5859375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.25927734,
+          "logprob": -0.26586914,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -2.25,
+          "logprob": -1.6347656,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.30126953,
+          "logprob": -0.22766113,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.7539062,
+          "logprob": -5.2265625,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.0878906,
+          "logprob": -3.0976562,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -0.6845703,
+          "logprob": -1.1025391,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.3918457,
+          "logprob": -0.1427002,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.8798828,
+          "logprob": -0.32592773,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.4980469,
+          "logprob": -2.8164062,
           "text": "]):"
         }
       ],
@@ -336,67 +338,68 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -1.1533203,
+          "logprob": -0.13012695,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 442,
-          "logprob": -0.9165039,
+          "id": 1524,
+          "logprob": -0.98046875,
           "special": false,
-          "text": " return"
+          "text": " \"\"\""
         },
         {
-          "id": 3632,
-          "logprob": -1.328125,
+          "id": 284,
+          "logprob": -0.69921875,
           "special": false,
-          "text": " sum"
+          "text": "\n   "
         },
         {
-          "id": 26,
-          "logprob": -0.07946777,
+          "id": 14883,
+          "logprob": -2.1992188,
           "special": false,
-          "text": "("
+          "text": " Calculate"
         },
         {
-          "id": 62,
-          "logprob": -0.09820557,
+          "id": 322,
+          "logprob": -0.2668457,
           "special": false,
-          "text": "L"
+          "text": " the"
         },
         {
-          "id": 27,
-          "logprob": -0.28930664,
+          "id": 3226,
+          "logprob": -0.083496094,
           "special": false,
-          "text": ")"
+          "text": " ge"
         },
         {
-          "id": 517,
-          "logprob": -0.34592773,
+          "id": 21017,
+          "logprob": -0.01902771,
           "special": false,
-          "text": " /"
+          "text": "ometric"
         },
         {
-          "id": 2069,
-          "logprob": -0.038330078,
+          "id": 5651,
+          "logprob": -0.029006958,
           "special": false,
-          "text": " len"
+          "text": " mean"
         },
         {
-          "id": 26,
-          "logprob": -0.0011940002,
+          "id": 432,
+          "logprob": -0.29248047,
           "special": false,
-          "text": "("
+          "text": " of"
         },
         {
-          "id": 62,
-          "logprob": -0.00050878525,
+          "id": 312,
+          "logprob": -0.3161621,
           "special": false,
-          "text": "L"
+          "text": " a"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "\n    return sum(L) / len(L"
+    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
   },
   {
     "details": {
@@ -411,57 +414,57 @@
         },
         {
           "id": 3226,
-          "logprob": -9.0234375,
+          "logprob": -8.5859375,
           "text": " ge"
         },
         {
           "id": 21017,
-          "logprob": -9.0859375,
+          "logprob": -7.5859375,
           "text": "ometric"
         },
         {
           "id": 81,
-          "logprob": -0.25927734,
+          "logprob": -0.26904297,
           "text": "_"
         },
         {
           "id": 6009,
-          "logprob": -2.25,
+          "logprob": -1.6386719,
           "text": "mean"
         },
         {
           "id": 26,
-          "logprob": -0.30126953,
+          "logprob": -0.22705078,
           "text": "("
         },
         {
           "id": 62,
-          "logprob": -5.7539062,
+          "logprob": -5.234375,
           "text": "L"
         },
         {
           "id": 44,
-          "logprob": -3.0878906,
+          "logprob": -3.1132812,
           "text": ":"
         },
         {
           "id": 1682,
-          "logprob": -0.6845703,
+          "logprob": -1.1074219,
           "text": " List"
         },
         {
           "id": 77,
-          "logprob": -0.3918457,
+          "logprob": -0.14477539,
           "text": "["
         },
         {
           "id": 1808,
-          "logprob": -0.8798828,
+          "logprob": -0.3256836,
           "text": "float"
         },
         {
           "id": 10794,
-          "logprob": -2.4980469,
+          "logprob": -2.8027344,
           "text": "]):"
         }
       ],
@@ -469,66 +472,67 @@
       "tokens": [
         {
           "id": 284,
-          "logprob": -1.1533203,
+          "logprob": -0.12915039,
           "special": false,
           "text": "\n   "
         },
         {
-          "id": 442,
-          "logprob": -0.91259766,
+          "id": 1524,
+          "logprob": -0.98535156,
           "special": false,
-          "text": " return"
+          "text": " \"\"\""
         },
         {
-          "id": 3632,
-          "logprob": -1.3251953,
+          "id": 284,
+          "logprob": -0.69921875,
           "special": false,
-          "text": " sum"
+          "text": "\n   "
         },
         {
-          "id": 26,
-          "logprob": -0.08062744,
+          "id": 14883,
+          "logprob": -2.2011719,
           "special": false,
-          "text": "("
+          "text": " Calculate"
         },
         {
-          "id": 62,
-          "logprob": -0.09906006,
+          "id": 322,
+          "logprob": -0.26708984,
           "special": false,
-          "text": "L"
+          "text": " the"
         },
         {
-          "id": 27,
-          "logprob": -0.28979492,
+          "id": 3226,
+          "logprob": -0.08502197,
           "special": false,
-          "text": ")"
+          "text": " ge"
         },
         {
-          "id": 517,
-          "logprob": -0.35958984,
+          "id": 21017,
+          "logprob": -0.019012451,
           "special": false,
-          "text": " /"
+          "text": "ometric"
         },
         {
-          "id": 2069,
-          "logprob": -0.038604736,
+          "id": 5651,
+          "logprob": -0.028625488,
           "special": false,
-          "text": " len"
+          "text": " mean"
         },
         {
-          "id": 26,
-          "logprob": -0.0011901855,
+          "id": 432,
+          "logprob": -0.29589844,
           "special": false,
-          "text": "("
+          "text": " of"
         },
         {
-          "id": 62,
-          "logprob": -0.0005078316,
+          "id": 312,
+          "logprob": -0.31591797,
           "special": false,
-          "text": "L"
+          "text": " a"
         }
-      ]
+      ],
+      "top_tokens": null
     },
-    "generated_text": "\n    return sum(L) / len(L"
+    "generated_text": "\n    \"\"\"\n    Calculate the geometric mean of a"
   }
 ]
diff --git a/integration-tests/models/__snapshots__/test_grammar_llama/test_non_flash_llama_grammar_json.json b/integration-tests/models/__snapshots__/test_grammar_llama/test_non_flash_llama_grammar_json.json
new file mode 100644
index 00000000..d7fb620d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_grammar_llama/test_non_flash_llama_grammar_json.json
@@ -0,0 +1,274 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "eos_token",
+    "generated_tokens": 30,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 5235,
+        "logprob": -10.0625,
+        "text": "info"
+      },
+      {
+        "id": 29901,
+        "logprob": -3.2324219,
+        "text": ":"
+      },
+      {
+        "id": 13260,
+        "logprob": -10.625,
+        "text": "dav"
+      },
+      {
+        "id": 333,
+        "logprob": -0.08276367,
+        "text": "id"
+      },
+      {
+        "id": 8753,
+        "logprob": -7.5273438,
+        "text": "hol"
+      },
+      {
+        "id": 17559,
+        "logprob": -3.8476562,
+        "text": "tz"
+      },
+      {
+        "id": 763,
+        "logprob": -10.140625,
+        "text": "like"
+      },
+      {
+        "id": 10697,
+        "logprob": -10.1953125,
+        "text": "trees"
+      },
+      {
+        "id": 322,
+        "logprob": -2.5742188,
+        "text": "and"
+      },
+      {
+        "id": 756,
+        "logprob": -7.4882812,
+        "text": "has"
+      },
+      {
+        "id": 1023,
+        "logprob": -5.0507812,
+        "text": "two"
+      },
+      {
+        "id": 274,
+        "logprob": -5.3164062,
+        "text": "c"
+      },
+      {
+        "id": 1446,
+        "logprob": -0.6694336,
+        "text": "ats"
+      },
+      {
+        "id": 29889,
+        "logprob": -0.9995117,
+        "text": "."
+      },
+      {
+        "id": 29871,
+        "logprob": -4.2421875,
+        "text": ""
+      }
+    ],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 6377,
+        "logprob": -0.14916992,
+        "special": false,
+        "text": "{\""
+      },
+      {
+        "id": 29888,
+        "logprob": -0.13598633,
+        "special": false,
+        "text": "f"
+      },
+      {
+        "id": 12935,
+        "logprob": -0.017669678,
+        "special": false,
+        "text": "irs"
+      },
+      {
+        "id": 29873,
+        "logprob": -0.00085639954,
+        "special": false,
+        "text": "t"
+      },
+      {
+        "id": 1170,
+        "logprob": -0.0054016113,
+        "special": false,
+        "text": "Name"
+      },
+      {
+        "id": 4710,
+        "logprob": -0.13549805,
+        "special": false,
+        "text": "\":\""
+      },
+      {
+        "id": 19504,
+        "logprob": -0.8852539,
+        "special": false,
+        "text": "David"
+      },
+      {
+        "id": 3284,
+        "logprob": -0.16394043,
+        "special": false,
+        "text": "\",\""
+      },
+      {
+        "id": 29882,
+        "logprob": -0.08862305,
+        "special": false,
+        "text": "h"
+      },
+      {
+        "id": 711,
+        "logprob": -0.66259766,
+        "special": false,
+        "text": "ob"
+      },
+      {
+        "id": 1609,
+        "logprob": -5.51939e-05,
+        "special": false,
+        "text": "by"
+      },
+      {
+        "id": 4710,
+        "logprob": -0.23120117,
+        "special": false,
+        "text": "\":\""
+      },
+      {
+        "id": 29911,
+        "logprob": -2.3730469,
+        "special": false,
+        "text": "T"
+      },
+      {
+        "id": 11003,
+        "logprob": -0.032104492,
+        "special": false,
+        "text": "rees"
+      },
+      {
+        "id": 3284,
+        "logprob": -0.22021484,
+        "special": false,
+        "text": "\",\""
+      },
+      {
+        "id": 4230,
+        "logprob": -0.06726074,
+        "special": false,
+        "text": "last"
+      },
+      {
+        "id": 1170,
+        "logprob": -0.003501892,
+        "special": false,
+        "text": "Name"
+      },
+      {
+        "id": 4710,
+        "logprob": -0.0045661926,
+        "special": false,
+        "text": "\":\""
+      },
+      {
+        "id": 29950,
+        "logprob": -0.12512207,
+        "special": false,
+        "text": "H"
+      },
+      {
+        "id": 14339,
+        "logprob": -0.009552002,
+        "special": false,
+        "text": "olt"
+      },
+      {
+        "id": 29920,
+        "logprob": -0.00042438507,
+        "special": false,
+        "text": "z"
+      },
+      {
+        "id": 3284,
+        "logprob": -0.11651611,
+        "special": false,
+        "text": "\",\""
+      },
+      {
+        "id": 29876,
+        "logprob": -0.29736328,
+        "special": false,
+        "text": "n"
+      },
+      {
+        "id": 398,
+        "logprob": -0.003030777,
+        "special": false,
+        "text": "um"
+      },
+      {
+        "id": 29907,
+        "logprob": -0.3774414,
+        "special": false,
+        "text": "C"
+      },
+      {
+        "id": 1446,
+        "logprob": -0.0003130436,
+        "special": false,
+        "text": "ats"
+      },
+      {
+        "id": 1115,
+        "logprob": -0.0021514893,
+        "special": false,
+        "text": "\":"
+      },
+      {
+        "id": 29906,
+        "logprob": -0.071899414,
+        "special": false,
+        "text": "2"
+      },
+      {
+        "id": 29913,
+        "logprob": -0.018997192,
+        "special": false,
+        "text": "}"
+      },
+      {
+        "id": 2,
+        "logprob": 0.0,
+        "special": true,
+        "text": "</s>"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "{\"firstName\":\"David\",\"hobby\":\"Trees\",\"lastName\":\"Holtz\",\"numCats\":2}"
+}
diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics.json
index 2c5d05f6..90fb6dcc 100644
--- a/integration-tests/models/__snapshots__/test_idefics/test_idefics.json
+++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics.json
@@ -11,92 +11,92 @@
       },
       {
         "id": 4911,
-        "logprob": -5.7851562,
+        "logprob": -6.9765625,
         "text": "User"
       },
       {
         "id": 29901,
-        "logprob": -0.006996155,
+        "logprob": -0.0059432983,
         "text": ":"
       },
       {
         "id": 32000,
-        "logprob": -0.81347656,
+        "logprob": -0.8408203,
         "text": "<fake_token_around_image>"
       },
       {
         "id": 32001,
-        "logprob": -6.687641e-05,
+        "logprob": -9.906292e-05,
         "text": "<image>"
       },
       {
         "id": 32000,
-        "logprob": -3.5762787e-07,
+        "logprob": -2.3841858e-07,
         "text": "<fake_token_around_image>"
       },
       {
         "id": 1815,
-        "logprob": -4.2148438,
+        "logprob": -4.1679688,
         "text": "Can"
       },
       {
         "id": 366,
-        "logprob": -0.014137268,
+        "logprob": -0.014099121,
         "text": "you"
       },
       {
         "id": 2649,
-        "logprob": -4.4335938,
+        "logprob": -4.4609375,
         "text": "tell"
       },
       {
         "id": 592,
-        "logprob": -0.2919922,
+        "logprob": -0.29882812,
         "text": "me"
       },
       {
         "id": 263,
-        "logprob": -4.2070312,
+        "logprob": -4.1445312,
         "text": "a"
       },
       {
         "id": 1407,
-        "logprob": -9.421875,
+        "logprob": -9.3828125,
         "text": "very"
       },
       {
         "id": 3273,
-        "logprob": -1.8720703,
+        "logprob": -1.9736328,
         "text": "short"
       },
       {
         "id": 5828,
-        "logprob": -0.26489258,
+        "logprob": -0.2800293,
         "text": "story"
       },
       {
         "id": 2729,
-        "logprob": -3.7441406,
+        "logprob": -3.5625,
         "text": "based"
       },
       {
         "id": 373,
-        "logprob": -0.0005393028,
+        "logprob": -0.0006427765,
         "text": "on"
       },
       {
         "id": 278,
-        "logprob": -0.140625,
+        "logprob": -0.13952637,
         "text": "the"
       },
       {
         "id": 1967,
-        "logprob": -0.06756592,
+        "logprob": -0.068115234,
         "text": "image"
       },
       {
         "id": 29973,
-        "logprob": -0.15454102,
+        "logprob": -0.16357422,
         "text": "?"
       }
     ],
@@ -104,25 +104,25 @@
     "tokens": [
       {
         "id": 32002,
-        "logprob": -0.0019140244,
+        "logprob": -0.0026474,
         "special": true,
         "text": "<end_of_utterance>"
       },
       {
         "id": 29871,
-        "logprob": -8.404255e-05,
+        "logprob": -8.547306e-05,
         "special": false,
         "text": " "
       },
       {
         "id": 13,
-        "logprob": -1.7642975e-05,
+        "logprob": -1.7881393e-05,
         "special": false,
         "text": "\n"
       },
       {
         "id": 7900,
-        "logprob": -2.9802322e-06,
+        "logprob": -3.0994415e-06,
         "special": false,
         "text": "Ass"
       },
@@ -140,30 +140,29 @@
       },
       {
         "id": 319,
-        "logprob": -0.91064453,
+        "logprob": -0.92529297,
         "special": false,
         "text": " A"
       },
       {
         "id": 696,
-        "logprob": -1.2412109,
+        "logprob": -1.1269531,
         "special": false,
         "text": " ro"
       },
       {
         "id": 15664,
-        "logprob": -0.0002439022,
+        "logprob": -0.00029492378,
         "special": false,
         "text": "oster"
       },
       {
         "id": 15028,
-        "logprob": -1.1630859,
+        "logprob": -1.1855469,
         "special": false,
         "text": " stands"
       }
-    ],
-    "top_tokens": null
+    ]
   },
   "generated_text": " \nAssistant: A rooster stands"
 }
diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json
index f258e38d..21d6161b 100644
--- a/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json
+++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json
@@ -12,92 +12,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7851562,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.006996155,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.81347656,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.687641e-05,
+          "logprob": -9.918213e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2148438,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014137268,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4335938,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.2919922,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2070312,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.421875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26489258,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7441406,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005393028,
+          "logprob": -0.00064468384,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.140625,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06756592,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15454102,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -105,19 +105,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.0019140244,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.583069e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7881393e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -135,36 +135,35 @@
         },
         {
           "id": 29901,
-          "logprob": -3.0994415e-06,
+          "logprob": -3.2186508e-06,
           "special": false,
           "text": ":"
         },
         {
           "id": 319,
-          "logprob": -0.9057617,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2294922,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.00024533272,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1640625,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   },
@@ -181,92 +180,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7773438,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.0070114136,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.8208008,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.699562e-05,
+          "logprob": -9.942055e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2265625,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014175415,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4296875,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.29516602,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2109375,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.4296875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26879883,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7675781,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005354881,
+          "logprob": -0.0006451607,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.13671875,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06719971,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15551758,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -274,19 +273,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.0019130707,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.571148e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7881393e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -310,30 +309,29 @@
         },
         {
           "id": 319,
-          "logprob": -0.9013672,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2324219,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.0002477169,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1660156,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   },
@@ -350,92 +348,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7773438,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.0070114136,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.8208008,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.699562e-05,
+          "logprob": -9.918213e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2265625,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014175415,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4296875,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.29516602,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2109375,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.4296875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26879883,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7675781,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005354881,
+          "logprob": -0.00064468384,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.13671875,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06719971,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15551758,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -443,19 +441,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.001912117,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.59499e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7762184e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -479,30 +477,29 @@
         },
         {
           "id": 319,
-          "logprob": -0.9013672,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2324219,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.0002477169,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1660156,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   },
@@ -519,92 +516,92 @@
         },
         {
           "id": 4911,
-          "logprob": -5.7773438,
+          "logprob": -6.9804688,
           "text": "User"
         },
         {
           "id": 29901,
-          "logprob": -0.0070114136,
+          "logprob": -0.006122589,
           "text": ":"
         },
         {
           "id": 32000,
-          "logprob": -0.8208008,
+          "logprob": -0.8417969,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 32001,
-          "logprob": -6.699562e-05,
+          "logprob": -9.942055e-05,
           "text": "<image>"
         },
         {
           "id": 32000,
-          "logprob": -3.5762787e-07,
+          "logprob": -2.3841858e-07,
           "text": "<fake_token_around_image>"
         },
         {
           "id": 1815,
-          "logprob": -4.2265625,
+          "logprob": -4.1679688,
           "text": "Can"
         },
         {
           "id": 366,
-          "logprob": -0.014175415,
+          "logprob": -0.014091492,
           "text": "you"
         },
         {
           "id": 2649,
-          "logprob": -4.4296875,
+          "logprob": -4.4726562,
           "text": "tell"
         },
         {
           "id": 592,
-          "logprob": -0.29516602,
+          "logprob": -0.2998047,
           "text": "me"
         },
         {
           "id": 263,
-          "logprob": -4.2109375,
+          "logprob": -4.15625,
           "text": "a"
         },
         {
           "id": 1407,
-          "logprob": -9.4296875,
+          "logprob": -9.3828125,
           "text": "very"
         },
         {
           "id": 3273,
-          "logprob": -1.8720703,
+          "logprob": -1.9716797,
           "text": "short"
         },
         {
           "id": 5828,
-          "logprob": -0.26879883,
+          "logprob": -0.27734375,
           "text": "story"
         },
         {
           "id": 2729,
-          "logprob": -3.7675781,
+          "logprob": -3.5605469,
           "text": "based"
         },
         {
           "id": 373,
-          "logprob": -0.0005354881,
+          "logprob": -0.0006451607,
           "text": "on"
         },
         {
           "id": 278,
-          "logprob": -0.13671875,
+          "logprob": -0.14160156,
           "text": "the"
         },
         {
           "id": 1967,
-          "logprob": -0.06719971,
+          "logprob": -0.06915283,
           "text": "image"
         },
         {
           "id": 29973,
-          "logprob": -0.15551758,
+          "logprob": -0.16381836,
           "text": "?"
         }
       ],
@@ -612,19 +609,19 @@
       "tokens": [
         {
           "id": 32002,
-          "logprob": -0.001912117,
+          "logprob": -0.0026664734,
           "special": true,
           "text": "<end_of_utterance>"
         },
         {
           "id": 29871,
-          "logprob": -8.392334e-05,
+          "logprob": -8.571148e-05,
           "special": false,
           "text": " "
         },
         {
           "id": 13,
-          "logprob": -1.7762184e-05,
+          "logprob": -1.8119812e-05,
           "special": false,
           "text": "\n"
         },
@@ -648,30 +645,29 @@
         },
         {
           "id": 319,
-          "logprob": -0.9013672,
+          "logprob": -0.9301758,
           "special": false,
           "text": " A"
         },
         {
           "id": 696,
-          "logprob": -1.2324219,
+          "logprob": -1.1279297,
           "special": false,
           "text": " ro"
         },
         {
           "id": 15664,
-          "logprob": -0.0002477169,
+          "logprob": -0.0002939701,
           "special": false,
           "text": "oster"
         },
         {
           "id": 15028,
-          "logprob": -1.1660156,
+          "logprob": -1.1865234,
           "special": false,
           "text": " stands"
         }
-      ],
-      "top_tokens": null
+      ]
     },
     "generated_text": " \nAssistant: A rooster stands"
   }
diff --git a/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_all_params.json b/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_all_params.json
new file mode 100644
index 00000000..e9d3e5ef
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_all_params.json
@@ -0,0 +1,65 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "stop_sequence",
+    "generated_tokens": 6,
+    "prefill": [
+      {
+        "id": 1,
+        "logprob": null,
+        "text": "<s>"
+      },
+      {
+        "id": 3735,
+        "logprob": -10.5,
+        "text": "Test"
+      },
+      {
+        "id": 2159,
+        "logprob": -12.140625,
+        "text": "request"
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -1.0654297,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 1014,
+        "logprob": -2.7460938,
+        "special": false,
+        "text": "The"
+      },
+      {
+        "id": 6032,
+        "logprob": -1.359375,
+        "special": false,
+        "text": " purpose"
+      },
+      {
+        "id": 302,
+        "logprob": 0.0,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 456,
+        "logprob": 0.0,
+        "special": false,
+        "text": " this"
+      },
+      {
+        "id": 1369,
+        "logprob": -0.40063477,
+        "special": false,
+        "text": " test"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "Test request\nThe purpose of this test"
+}
diff --git a/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_load.json b/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_load.json
new file mode 100644
index 00000000..76b0154c
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_load.json
@@ -0,0 +1,59178 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1247,
+          "logprob": -2.3886719,
+          "text": "User"
+        },
+        {
+          "id": 28747,
+          "logprob": -12.328125,
+          "text": ":"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 2418,
+          "logprob": -19.0625,
+          "text": "Can"
+        },
+        {
+          "id": 368,
+          "logprob": -0.19799805,
+          "text": "you"
+        },
+        {
+          "id": 1912,
+          "logprob": -1.5029297,
+          "text": "tell"
+        },
+        {
+          "id": 528,
+          "logprob": -0.30932617,
+          "text": "me"
+        },
+        {
+          "id": 264,
+          "logprob": -2.6328125,
+          "text": "a"
+        },
+        {
+          "id": 1215,
+          "logprob": -9.1015625,
+          "text": "very"
+        },
+        {
+          "id": 2485,
+          "logprob": -0.99853516,
+          "text": "short"
+        },
+        {
+          "id": 2838,
+          "logprob": -0.4609375,
+          "text": "story"
+        },
+        {
+          "id": 2818,
+          "logprob": -3.3144531,
+          "text": "based"
+        },
+        {
+          "id": 356,
+          "logprob": -0.0289917,
+          "text": "on"
+        },
+        {
+          "id": 272,
+          "logprob": -0.9902344,
+          "text": "the"
+        },
+        {
+          "id": 3469,
+          "logprob": -0.28955078,
+          "text": "image"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.43188477,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.0075035095,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.20129395,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 16114,
+          "logprob": -1.2607422,
+          "special": false,
+          "text": "Once"
+        },
+        {
+          "id": 3714,
+          "logprob": -0.20825195,
+          "special": false,
+          "text": " upon"
+        },
+        {
+          "id": 264,
+          "logprob": -0.0017719269,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 727,
+          "logprob": -0.011932373,
+          "special": false,
+          "text": " time"
+        },
+        {
+          "id": 28725,
+          "logprob": -0.17297363,
+          "special": false,
+          "text": ","
+        },
+        {
+          "id": 736,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " there"
+        },
+        {
+          "id": 403,
+          "logprob": -0.05758667,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 264,
+          "logprob": -0.00969696,
+          "special": false,
+          "text": " a"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nOnce upon a time, there was a"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1247,
+          "logprob": -2.3886719,
+          "text": "User"
+        },
+        {
+          "id": 28747,
+          "logprob": -12.328125,
+          "text": ":"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 2418,
+          "logprob": -19.078125,
+          "text": "Can"
+        },
+        {
+          "id": 368,
+          "logprob": -0.19665527,
+          "text": "you"
+        },
+        {
+          "id": 1912,
+          "logprob": -1.5009766,
+          "text": "tell"
+        },
+        {
+          "id": 528,
+          "logprob": -0.31054688,
+          "text": "me"
+        },
+        {
+          "id": 264,
+          "logprob": -2.6269531,
+          "text": "a"
+        },
+        {
+          "id": 1215,
+          "logprob": -9.1015625,
+          "text": "very"
+        },
+        {
+          "id": 2485,
+          "logprob": -0.99365234,
+          "text": "short"
+        },
+        {
+          "id": 2838,
+          "logprob": -0.45996094,
+          "text": "story"
+        },
+        {
+          "id": 2818,
+          "logprob": -3.3183594,
+          "text": "based"
+        },
+        {
+          "id": 356,
+          "logprob": -0.029006958,
+          "text": "on"
+        },
+        {
+          "id": 272,
+          "logprob": -0.9897461,
+          "text": "the"
+        },
+        {
+          "id": 3469,
+          "logprob": -0.29125977,
+          "text": "image"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.43017578,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.007446289,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.20129395,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 16114,
+          "logprob": -1.2587891,
+          "special": false,
+          "text": "Once"
+        },
+        {
+          "id": 3714,
+          "logprob": -0.20825195,
+          "special": false,
+          "text": " upon"
+        },
+        {
+          "id": 264,
+          "logprob": -0.0017786026,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 727,
+          "logprob": -0.011955261,
+          "special": false,
+          "text": " time"
+        },
+        {
+          "id": 28725,
+          "logprob": -0.17297363,
+          "special": false,
+          "text": ","
+        },
+        {
+          "id": 736,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " there"
+        },
+        {
+          "id": 403,
+          "logprob": -0.05758667,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 264,
+          "logprob": -0.009544373,
+          "special": false,
+          "text": " a"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nOnce upon a time, there was a"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1247,
+          "logprob": -2.3886719,
+          "text": "User"
+        },
+        {
+          "id": 28747,
+          "logprob": -12.328125,
+          "text": ":"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 2418,
+          "logprob": -19.078125,
+          "text": "Can"
+        },
+        {
+          "id": 368,
+          "logprob": -0.19665527,
+          "text": "you"
+        },
+        {
+          "id": 1912,
+          "logprob": -1.5009766,
+          "text": "tell"
+        },
+        {
+          "id": 528,
+          "logprob": -0.31054688,
+          "text": "me"
+        },
+        {
+          "id": 264,
+          "logprob": -2.6269531,
+          "text": "a"
+        },
+        {
+          "id": 1215,
+          "logprob": -9.1015625,
+          "text": "very"
+        },
+        {
+          "id": 2485,
+          "logprob": -0.99365234,
+          "text": "short"
+        },
+        {
+          "id": 2838,
+          "logprob": -0.45996094,
+          "text": "story"
+        },
+        {
+          "id": 2818,
+          "logprob": -3.3183594,
+          "text": "based"
+        },
+        {
+          "id": 356,
+          "logprob": -0.029006958,
+          "text": "on"
+        },
+        {
+          "id": 272,
+          "logprob": -0.9897461,
+          "text": "the"
+        },
+        {
+          "id": 3469,
+          "logprob": -0.29125977,
+          "text": "image"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.43017578,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.007446289,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.20129395,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 16114,
+          "logprob": -1.2587891,
+          "special": false,
+          "text": "Once"
+        },
+        {
+          "id": 3714,
+          "logprob": -0.20825195,
+          "special": false,
+          "text": " upon"
+        },
+        {
+          "id": 264,
+          "logprob": -0.0017786026,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 727,
+          "logprob": -0.011955261,
+          "special": false,
+          "text": " time"
+        },
+        {
+          "id": 28725,
+          "logprob": -0.17297363,
+          "special": false,
+          "text": ","
+        },
+        {
+          "id": 736,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " there"
+        },
+        {
+          "id": 403,
+          "logprob": -0.05758667,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 264,
+          "logprob": -0.009544373,
+          "special": false,
+          "text": " a"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nOnce upon a time, there was a"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1,
+          "logprob": null,
+          "text": "<s>"
+        },
+        {
+          "id": 1247,
+          "logprob": -2.3886719,
+          "text": "User"
+        },
+        {
+          "id": 28747,
+          "logprob": -12.328125,
+          "text": ":"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.09375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -9.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.78125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.0,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.40625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.71875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.8359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.53125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.4296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.2109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.15625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -18.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4921875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.1171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.46875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.4375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -19.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5234375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6171875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.359375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.1875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.0703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3046875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.609375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.96875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.2578125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3671875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -10.9765625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.21875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.25,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.2265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.953125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.90625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.65625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.296875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.9140625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.75,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0859375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.5625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.8828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.6640625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.6875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.890625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.4453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7265625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.84375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.734375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.3515625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.015625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.5703125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.796875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.3203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.8984375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.34375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.1328125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.1484375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.0078125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.0390625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -14.421875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.03125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.8203125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.546875,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -13.3828125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.28125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -17.59375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -11.9453125,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.7109375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -15.9375,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -16.625,
+          "text": "<image>"
+        },
+        {
+          "id": 32000,
+          "logprob": -12.5,
+          "text": "<image>"
+        },
+        {
+          "id": 2418,
+          "logprob": -19.078125,
+          "text": "Can"
+        },
+        {
+          "id": 368,
+          "logprob": -0.19665527,
+          "text": "you"
+        },
+        {
+          "id": 1912,
+          "logprob": -1.5009766,
+          "text": "tell"
+        },
+        {
+          "id": 528,
+          "logprob": -0.31054688,
+          "text": "me"
+        },
+        {
+          "id": 264,
+          "logprob": -2.6269531,
+          "text": "a"
+        },
+        {
+          "id": 1215,
+          "logprob": -9.1015625,
+          "text": "very"
+        },
+        {
+          "id": 2485,
+          "logprob": -0.99365234,
+          "text": "short"
+        },
+        {
+          "id": 2838,
+          "logprob": -0.45996094,
+          "text": "story"
+        },
+        {
+          "id": 2818,
+          "logprob": -3.3183594,
+          "text": "based"
+        },
+        {
+          "id": 356,
+          "logprob": -0.029006958,
+          "text": "on"
+        },
+        {
+          "id": 272,
+          "logprob": -0.9897461,
+          "text": "the"
+        },
+        {
+          "id": 3469,
+          "logprob": -0.29125977,
+          "text": "image"
+        },
+        {
+          "id": 28804,
+          "logprob": -0.43017578,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 13,
+          "logprob": -0.007446289,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 13,
+          "logprob": -0.20129395,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 16114,
+          "logprob": -1.2587891,
+          "special": false,
+          "text": "Once"
+        },
+        {
+          "id": 3714,
+          "logprob": -0.20825195,
+          "special": false,
+          "text": " upon"
+        },
+        {
+          "id": 264,
+          "logprob": -0.0017786026,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 727,
+          "logprob": -0.011955261,
+          "special": false,
+          "text": " time"
+        },
+        {
+          "id": 28725,
+          "logprob": -0.17297363,
+          "special": false,
+          "text": ","
+        },
+        {
+          "id": 736,
+          "logprob": -0.91015625,
+          "special": false,
+          "text": " there"
+        },
+        {
+          "id": 403,
+          "logprob": -0.05758667,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 264,
+          "logprob": -0.009544373,
+          "special": false,
+          "text": " a"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nOnce upon a time, there was a"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_simple.json b/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_simple.json
new file mode 100644
index 00000000..f0f2ee9e
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_llava_next/test_flash_llava_next_simple.json
@@ -0,0 +1,73 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 13,
+        "logprob": -0.00756073,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 13,
+        "logprob": -0.20117188,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 16114,
+        "logprob": -1.2597656,
+        "special": false,
+        "text": "Once"
+      },
+      {
+        "id": 3714,
+        "logprob": -0.20825195,
+        "special": false,
+        "text": " upon"
+      },
+      {
+        "id": 264,
+        "logprob": -0.00178051,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 727,
+        "logprob": -0.011955261,
+        "special": false,
+        "text": " time"
+      },
+      {
+        "id": 28725,
+        "logprob": -0.17541504,
+        "special": false,
+        "text": ","
+      },
+      {
+        "id": 736,
+        "logprob": -0.91308594,
+        "special": false,
+        "text": " there"
+      },
+      {
+        "id": 403,
+        "logprob": -0.058410645,
+        "special": false,
+        "text": " was"
+      },
+      {
+        "id": 264,
+        "logprob": -0.009689331,
+        "special": false,
+        "text": " a"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\n\nOnce upon a time, there was a"
+}
diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba.json
new file mode 100644
index 00000000..eaba5078
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba.json
@@ -0,0 +1,73 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [],
+    "seed": null,
+    "tokens": [
+      {
+        "id": 187,
+        "logprob": -0.37890625,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 187,
+        "logprob": -0.26953125,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 30763,
+        "logprob": -1.1953125,
+        "special": false,
+        "text": "Deep"
+      },
+      {
+        "id": 4715,
+        "logprob": -0.53515625,
+        "special": false,
+        "text": " learning"
+      },
+      {
+        "id": 310,
+        "logprob": -0.625,
+        "special": false,
+        "text": " is"
+      },
+      {
+        "id": 247,
+        "logprob": -0.6796875,
+        "special": false,
+        "text": " a"
+      },
+      {
+        "id": 747,
+        "logprob": -2.0,
+        "special": false,
+        "text": " new"
+      },
+      {
+        "id": 1511,
+        "logprob": -2.3125,
+        "special": false,
+        "text": " type"
+      },
+      {
+        "id": 273,
+        "logprob": -0.0028533936,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 5145,
+        "logprob": -1.265625,
+        "special": false,
+        "text": " machine"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "\n\nDeep learning is a new type of machine"
+}
diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json
new file mode 100644
index 00000000..85e9a9e0
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json
@@ -0,0 +1,99 @@
+{
+  "details": {
+    "best_of_sequences": null,
+    "finish_reason": "length",
+    "generated_tokens": 10,
+    "prefill": [
+      {
+        "id": 2502,
+        "logprob": null,
+        "text": " red"
+      },
+      {
+        "id": 13,
+        "logprob": -2.734375,
+        "text": ","
+      },
+      {
+        "id": 8862,
+        "logprob": -3.6875,
+        "text": " yellow"
+      },
+      {
+        "id": 13,
+        "logprob": -0.40234375,
+        "text": ","
+      },
+      {
+        "id": 209,
+        "logprob": -8.25,
+        "text": " "
+      }
+    ],
+    "seed": 0,
+    "tokens": [
+      {
+        "id": 187,
+        "logprob": 0.0,
+        "special": false,
+        "text": "\n"
+      },
+      {
+        "id": 395,
+        "logprob": -0.3125,
+        "special": false,
+        "text": "and"
+      },
+      {
+        "id": 4797,
+        "logprob": 0.0,
+        "special": false,
+        "text": " blue"
+      },
+      {
+        "id": 9830,
+        "logprob": -1.65625,
+        "special": false,
+        "text": " colors"
+      },
+      {
+        "id": 15,
+        "logprob": 0.0,
+        "special": false,
+        "text": "."
+      },
+      {
+        "id": 329,
+        "logprob": -2.4375,
+        "special": false,
+        "text": " A"
+      },
+      {
+        "id": 1180,
+        "logprob": -1.953125,
+        "special": false,
+        "text": " number"
+      },
+      {
+        "id": 273,
+        "logprob": 0.0,
+        "special": false,
+        "text": " of"
+      },
+      {
+        "id": 1027,
+        "logprob": -1.5546875,
+        "special": false,
+        "text": " different"
+      },
+      {
+        "id": 3295,
+        "logprob": -0.97265625,
+        "special": false,
+        "text": " color"
+      }
+    ],
+    "top_tokens": null
+  },
+  "generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
+}
diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json
new file mode 100644
index 00000000..4921c14b
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json
@@ -0,0 +1,398 @@
+[
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1276,
+          "logprob": null,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -0.83984375,
+          "text": " is"
+        },
+        {
+          "id": 18147,
+          "logprob": -12.8125,
+          "text": " Deep"
+        },
+        {
+          "id": 20727,
+          "logprob": -2.84375,
+          "text": " Learning"
+        },
+        {
+          "id": 32,
+          "logprob": -1.25,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 187,
+          "logprob": -0.37890625,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -0.4296875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 30763,
+          "logprob": -1.078125,
+          "special": false,
+          "text": "Deep"
+        },
+        {
+          "id": 4715,
+          "logprob": -0.515625,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 310,
+          "logprob": -0.6015625,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -0.65625,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 747,
+          "logprob": -2.109375,
+          "special": false,
+          "text": " new"
+        },
+        {
+          "id": 1511,
+          "logprob": -2.328125,
+          "special": false,
+          "text": " type"
+        },
+        {
+          "id": 273,
+          "logprob": -0.0032653809,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 5145,
+          "logprob": -1.28125,
+          "special": false,
+          "text": " machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nDeep learning is a new type of machine"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1276,
+          "logprob": null,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -0.80078125,
+          "text": " is"
+        },
+        {
+          "id": 18147,
+          "logprob": -13.25,
+          "text": " Deep"
+        },
+        {
+          "id": 20727,
+          "logprob": -2.828125,
+          "text": " Learning"
+        },
+        {
+          "id": 32,
+          "logprob": -1.1953125,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 187,
+          "logprob": -0.296875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -0.3359375,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 30763,
+          "logprob": -1.2578125,
+          "special": false,
+          "text": "Deep"
+        },
+        {
+          "id": 4715,
+          "logprob": -0.5546875,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 310,
+          "logprob": -0.62890625,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -0.64453125,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 747,
+          "logprob": -2.078125,
+          "special": false,
+          "text": " new"
+        },
+        {
+          "id": 1511,
+          "logprob": -2.28125,
+          "special": false,
+          "text": " type"
+        },
+        {
+          "id": 273,
+          "logprob": -0.0030670166,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 5145,
+          "logprob": -1.3125,
+          "special": false,
+          "text": " machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nDeep learning is a new type of machine"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1276,
+          "logprob": null,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -0.80078125,
+          "text": " is"
+        },
+        {
+          "id": 18147,
+          "logprob": -13.25,
+          "text": " Deep"
+        },
+        {
+          "id": 20727,
+          "logprob": -2.828125,
+          "text": " Learning"
+        },
+        {
+          "id": 32,
+          "logprob": -1.1953125,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 187,
+          "logprob": -0.296875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -0.3359375,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 30763,
+          "logprob": -1.2578125,
+          "special": false,
+          "text": "Deep"
+        },
+        {
+          "id": 4715,
+          "logprob": -0.5546875,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 310,
+          "logprob": -0.62890625,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -0.64453125,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 747,
+          "logprob": -2.078125,
+          "special": false,
+          "text": " new"
+        },
+        {
+          "id": 1511,
+          "logprob": -2.28125,
+          "special": false,
+          "text": " type"
+        },
+        {
+          "id": 273,
+          "logprob": -0.0030670166,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 5145,
+          "logprob": -1.3125,
+          "special": false,
+          "text": " machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nDeep learning is a new type of machine"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 1276,
+          "logprob": null,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -0.80078125,
+          "text": " is"
+        },
+        {
+          "id": 18147,
+          "logprob": -13.25,
+          "text": " Deep"
+        },
+        {
+          "id": 20727,
+          "logprob": -2.828125,
+          "text": " Learning"
+        },
+        {
+          "id": 32,
+          "logprob": -1.1953125,
+          "text": "?"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 187,
+          "logprob": -0.296875,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 187,
+          "logprob": -0.3359375,
+          "special": false,
+          "text": "\n"
+        },
+        {
+          "id": 30763,
+          "logprob": -1.2578125,
+          "special": false,
+          "text": "Deep"
+        },
+        {
+          "id": 4715,
+          "logprob": -0.5546875,
+          "special": false,
+          "text": " learning"
+        },
+        {
+          "id": 310,
+          "logprob": -0.62890625,
+          "special": false,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -0.64453125,
+          "special": false,
+          "text": " a"
+        },
+        {
+          "id": 747,
+          "logprob": -2.078125,
+          "special": false,
+          "text": " new"
+        },
+        {
+          "id": 1511,
+          "logprob": -2.28125,
+          "special": false,
+          "text": " type"
+        },
+        {
+          "id": 273,
+          "logprob": -0.0030670166,
+          "special": false,
+          "text": " of"
+        },
+        {
+          "id": 5145,
+          "logprob": -1.3125,
+          "special": false,
+          "text": " machine"
+        }
+      ],
+      "top_tokens": null
+    },
+    "generated_text": "\n\nDeep learning is a new type of machine"
+  }
+]
diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
index 024823d0..5cacf3e9 100644
--- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
+++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json
@@ -1,8 +1,8 @@
 {
   "details": {
     "best_of_sequences": null,
-    "finish_reason": "eos_token",
-    "generated_tokens": 9,
+    "finish_reason": "length",
+    "generated_tokens": 10,
     "prefill": [
       {
         "id": 0,
@@ -14,7 +14,7 @@
     "tokens": [
       {
         "id": 16017,
-        "logprob": -0.30908203,
+        "logprob": 0.0,
         "special": false,
         "text": " blue"
       },
@@ -26,39 +26,45 @@
       },
       {
         "id": 259,
-        "logprob": -0.28271484,
+        "logprob": -0.4716797,
         "special": false,
         "text": " "
       },
       {
-        "id": 15484,
-        "logprob": -1.7929688,
+        "id": 261,
+        "logprob": -0.044677734,
         "special": false,
-        "text": "appear"
+        "text": ","
       },
       {
-        "id": 345,
-        "logprob": -0.8935547,
+        "id": 35622,
+        "logprob": -0.79589844,
         "special": false,
-        "text": "ed"
+        "text": " cloud"
       },
       {
-        "id": 281,
+        "id": 263,
+        "logprob": -1.2958984,
+        "special": false,
+        "text": "s"
+      },
+      {
+        "id": 305,
         "logprob": 0.0,
         "special": false,
-        "text": " in"
+        "text": " and"
       },
       {
-        "id": 287,
+        "id": 35622,
+        "logprob": -1.1630859,
+        "special": false,
+        "text": " cloud"
+      },
+      {
+        "id": 263,
         "logprob": 0.0,
         "special": false,
-        "text": " the"
-      },
-      {
-        "id": 20495,
-        "logprob": -0.32299805,
-        "special": false,
-        "text": " sky"
+        "text": "s"
       },
       {
         "id": 1,
@@ -66,7 +72,8 @@
         "special": true,
         "text": "</s>"
       }
-    ]
+    ],
+    "top_tokens": null
   },
-  "generated_text": "Why is the sky blue?blue sky appeared in the sky"
+  "generated_text": "Why is the sky blue?blue sky, clouds and clouds"
 }
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
new file mode 100644
index 00000000..153a508d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_no_tools.json
@@ -0,0 +1,26 @@
+{
+  "choices": [
+    {
+      "finish_reason": "length",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "As of today, there is a Update available for the Brooklyn, New York, area. According to the latest forecast, it's warm with high temperatures throughout the day. It's forecasted at 75°F for today and 77°F for tomorrow. However, in autumn, the weather typically changes drastically, becoming cooler and wetter. You can find the current weather forecast for the area through your local weather service. Additionally",
+        "name": null,
+        "role": "assistant",
+        "tool_calls": null
+      },
+      "usage": null
+    }
+  ],
+  "created": 1710795556,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "text_completion",
+  "system_fingerprint": "2.0.0-native",
+  "usage": {
+    "completion_tokens": 100,
+    "prompt_tokens": 60,
+    "total_tokens": 160
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
new file mode 100644
index 00000000..56920b3e
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools.json
@@ -0,0 +1,40 @@
+{
+  "choices": [
+    {
+      "finish_reason": "eos_token",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": null,
+        "name": null,
+        "role": "assistant",
+        "tool_calls": [
+          {
+            "function": {
+              "description": null,
+              "name": "tools",
+              "parameters": {
+                "format": "celsius",
+                "location": "New York, NY",
+                "num_days": 14
+              }
+            },
+            "id": 0,
+            "type": "function"
+          }
+        ]
+      },
+      "usage": null
+    }
+  ],
+  "created": 1710795556,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "text_completion",
+  "system_fingerprint": "2.0.0-native",
+  "usage": {
+    "completion_tokens": 29,
+    "prompt_tokens": 316,
+    "total_tokens": 345
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
new file mode 100644
index 00000000..fe679362
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_auto.json
@@ -0,0 +1,40 @@
+{
+  "choices": [
+    {
+      "finish_reason": "eos_token",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": null,
+        "name": null,
+        "role": "assistant",
+        "tool_calls": [
+          {
+            "function": {
+              "description": null,
+              "name": "tools",
+              "parameters": {
+                "format": "celsius",
+                "location": "New York, NY",
+                "num_days": 14
+              }
+            },
+            "id": 0,
+            "type": "function"
+          }
+        ]
+      },
+      "usage": null
+    }
+  ],
+  "created": 1710795557,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "text_completion",
+  "system_fingerprint": "2.0.0-native",
+  "usage": {
+    "completion_tokens": 29,
+    "prompt_tokens": 316,
+    "total_tokens": 345
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
new file mode 100644
index 00000000..e48a1e7d
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_choice.json
@@ -0,0 +1,39 @@
+{
+  "choices": [
+    {
+      "finish_reason": "eos_token",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": null,
+        "name": null,
+        "role": "assistant",
+        "tool_calls": [
+          {
+            "function": {
+              "description": null,
+              "name": "tools",
+              "parameters": {
+                "format": "celsius",
+                "location": "New York, NY"
+              }
+            },
+            "id": 0,
+            "type": "function"
+          }
+        ]
+      },
+      "usage": null
+    }
+  ],
+  "created": 1710795557,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "text_completion",
+  "system_fingerprint": "2.0.0-native",
+  "usage": {
+    "completion_tokens": 21,
+    "prompt_tokens": 187,
+    "total_tokens": 208
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
new file mode 100644
index 00000000..cfebc05f
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_tools_llama/test_flash_llama_grammar_tools_stream.json
@@ -0,0 +1,27 @@
+{
+  "choices": [
+    {
+      "delta": {
+        "content": null,
+        "role": "assistant",
+        "tool_calls": {
+          "function": {
+            "arguments": "</s>",
+            "name": null
+          },
+          "id": "",
+          "index": 0,
+          "type": "function"
+        }
+      },
+      "finish_reason": "eos_token",
+      "index": 0,
+      "logprobs": null
+    }
+  ],
+  "created": 1710795499,
+  "id": "",
+  "model": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "object": "text_completion",
+  "system_fingerprint": "2.0.0-native"
+}
diff --git a/integration-tests/models/test_flash_awq.py b/integration-tests/models/test_flash_awq.py
index 62a95f48..ead918c3 100644
--- a/integration-tests/models/test_flash_awq.py
+++ b/integration-tests/models/test_flash_awq.py
@@ -18,7 +18,6 @@ async def flash_llama_awq(flash_llama_awq_handle):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
     response = await flash_llama_awq.generate(
         "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
@@ -33,7 +32,6 @@ async def test_flash_llama_awq(flash_llama_awq, response_snapshot):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
     response = await flash_llama_awq.generate(
         "What is Deep Learning?",
@@ -55,7 +53,6 @@ async def test_flash_llama_awq_all_params(flash_llama_awq, response_snapshot):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_llama_awq_load(flash_llama_awq, generate_load, response_snapshot):
     responses = await generate_load(
         flash_llama_awq, "What is Deep Learning?", max_new_tokens=10, n=4
diff --git a/integration-tests/models/test_flash_awq_sharded.py b/integration-tests/models/test_flash_awq_sharded.py
index 1c687fc9..a83614ac 100644
--- a/integration-tests/models/test_flash_awq_sharded.py
+++ b/integration-tests/models/test_flash_awq_sharded.py
@@ -18,7 +18,6 @@ async def flash_llama_awq_sharded(flash_llama_awq_handle_sharded):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapshot):
     response = await flash_llama_awq_sharded.generate(
         "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
@@ -33,7 +32,6 @@ async def test_flash_llama_awq_sharded(flash_llama_awq_sharded, response_snapsho
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_llama_awq_load_sharded(
     flash_llama_awq_sharded, generate_load, response_snapshot
 ):
diff --git a/integration-tests/models/test_flash_gemma.py b/integration-tests/models/test_flash_gemma.py
new file mode 100644
index 00000000..2822b5e2
--- /dev/null
+++ b/integration-tests/models/test_flash_gemma.py
@@ -0,0 +1,61 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_gemma_handle(launcher):
+    with launcher("gg-hf/gemma-2b", num_shard=1) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_gemma(flash_gemma_handle):
+    await flash_gemma_handle.health(300)
+    return flash_gemma_handle.client
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_gemma(flash_gemma, response_snapshot):
+    response = await flash_gemma.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_gemma_all_params(flash_gemma, response_snapshot):
+    response = await flash_gemma.generate(
+        "Test request",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_gemma_load(flash_gemma, generate_load, response_snapshot):
+    responses = await generate_load(flash_gemma, "Test request", max_new_tokens=10, n=4)
+
+    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_grammar_llama.py b/integration-tests/models/test_flash_grammar_llama.py
new file mode 100644
index 00000000..ce1cf787
--- /dev/null
+++ b/integration-tests/models/test_flash_grammar_llama.py
@@ -0,0 +1,150 @@
+import pytest
+import json
+
+from text_generation.types import GrammarType
+
+
+@pytest.fixture(scope="module")
+def flash_llama_grammar_handle(launcher):
+    with launcher(
+        "TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama_grammar(flash_llama_grammar_handle):
+    await flash_llama_grammar_handle.health(300)
+    return flash_llama_grammar_handle.client
+
+
+@pytest.mark.asyncio
+async def test_flash_llama_grammar(flash_llama_grammar, response_snapshot):
+    response = await flash_llama_grammar.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+async def test_flash_llama_grammar_regex(flash_llama_grammar, response_snapshot):
+    response = await flash_llama_grammar.generate(
+        "Whats Googles DNS",
+        max_new_tokens=10,
+        decoder_input_details=True,
+        seed=0,
+        grammar={
+            "type": GrammarType.Regex,  # "regex"
+            "value": "((25[0-5]|2[0-4]\\d|[01]?\\d\\d?)\\.){3}(25[0-5]|2[0-4]\\d|[01]?\\d\\d?)",
+        },
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == "42.1.1.101"
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+async def test_flash_llama_grammar_json(flash_llama_grammar, response_snapshot):
+    response = await flash_llama_grammar.generate(
+        "info: david holtz like trees and has two cats. ",
+        max_new_tokens=100,
+        decoder_input_details=True,
+        seed=0,
+        grammar={
+            "type": GrammarType.Json,  # "json"
+            "value": json.dumps(
+                {
+                    "type": "object",
+                    "$id": "https://example.com/person.schema.json",
+                    "$schema": "https://json-schema.org/draft/2020-12/schema",
+                    "title": "Person",
+                    "properties": {
+                        "firstName": {
+                            "type": "string",
+                            "description": "The person'''s first name.",
+                        },
+                        "lastName": {
+                            "type": "string",
+                            "description": "The person'''s last name.",
+                        },
+                        "hobby": {
+                            "description": "The person'''s hobby.",
+                            "type": "string",
+                        },
+                        "numCats": {
+                            "description": "The number of cats the person has.",
+                            "type": "integer",
+                            "minimum": 0,
+                        },
+                    },
+                    "required": ["firstName", "lastName", "hobby", "numCats"],
+                }
+            ),
+        },
+    )
+
+    assert response.details.generated_tokens == 30
+    assert (
+        response.generated_text
+        == '{"firstName":"David","hobby":"Trees","lastName":"Holtz","numCats":2}'
+    )
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+async def test_flash_llama_grammar_load(
+    flash_llama_grammar, generate_load, response_snapshot
+):
+    responses = await generate_load(
+        flash_llama_grammar,
+        "name: david. email:  ",
+        max_new_tokens=10,
+        n=4,
+        stop_sequences=[".com"],
+        seed=0,
+        grammar={
+            "type": GrammarType.Regex,  # "regex"
+            "value": "[\\w-]+@([\\w-]+\\.)+[\\w-]+",  # email regex
+        },
+    )
+
+    assert len(responses) == 4
+
+    expected = "123456@gmail.com"
+
+    for response in responses:
+        assert response.generated_text == expected
+
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
+    assert responses == response_snapshot
+
+
+# this is the same as the above test, but only fires off a single request
+# this is only to ensure that the parallel and single inference produce the same result
+@pytest.mark.skip
+@pytest.mark.asyncio
+async def test_flash_llama_grammar_single_load_instance(
+    flash_llama_grammar, generate_load, response_snapshot
+):
+    response = await flash_llama_grammar.generate(
+        "name: david. email:  ",
+        max_new_tokens=10,
+        stop_sequences=[".com"],
+        seed=0,
+        grammar={
+            "type": GrammarType.Regex,  # "regex"
+            "value": "[\\w-]+@([\\w-]+\\.)+[\\w-]+",  # email regex
+        },
+    )
+
+    # assert response.details.generated_tokens == 30
+    assert response.generated_text == "123456@gmail.com"
+
+    assert response == response_snapshot
diff --git a/integration-tests/models/test_flash_medusa.py b/integration-tests/models/test_flash_medusa.py
new file mode 100644
index 00000000..27db5665
--- /dev/null
+++ b/integration-tests/models/test_flash_medusa.py
@@ -0,0 +1,64 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_medusa_handle(launcher):
+    with launcher(
+        "FasterDecoding/medusa-vicuna-7b-v1.3", num_shard=2, revision="refs/pr/1"
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_medusa(flash_medusa_handle):
+    await flash_medusa_handle.health(300)
+    return flash_medusa_handle.client
+
+
+@pytest.mark.asyncio
+async def test_flash_medusa_simple(flash_medusa, response_snapshot):
+    response = await flash_medusa.generate(
+        "What is Deep Learning?", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_medusa_all_params(flash_medusa, response_snapshot):
+    response = await flash_medusa.generate(
+        "What is Deep Learning?",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_medusa_load(flash_medusa, generate_load, response_snapshot):
+    responses = await generate_load(
+        flash_medusa, "What is Deep Learning?", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text for r in responses]}"
+    assert (
+        responses[0].generated_text == "\nDeep learning is a subset of machine learning"
+    )
+
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_mistral.py b/integration-tests/models/test_flash_mistral.py
index 63cb09b5..52b51928 100644
--- a/integration-tests/models/test_flash_mistral.py
+++ b/integration-tests/models/test_flash_mistral.py
@@ -14,18 +14,17 @@ async def flash_mistral(flash_mistral_handle):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_mistral(flash_mistral, response_snapshot):
     response = await flash_mistral.generate(
         "Test request", max_new_tokens=10, decoder_input_details=True
     )
 
     assert response.details.generated_tokens == 10
+    assert response.generated_text == ": Let n = 10 - 1"
     assert response == response_snapshot
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_mistral_all_params(flash_mistral, response_snapshot):
     response = await flash_mistral.generate(
         "Test request",
@@ -48,13 +47,15 @@ async def test_flash_mistral_all_params(flash_mistral, response_snapshot):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_mistral_load(flash_mistral, generate_load, response_snapshot):
     responses = await generate_load(
         flash_mistral, "Test request", max_new_tokens=10, n=4
     )
 
     assert len(responses) == 4
-    assert all([r.generated_text == responses[0].generated_text for r in responses])
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text  for r in responses]}"
+    assert responses[0].generated_text == ": Let n = 10 - 1"
 
     assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_phi.py b/integration-tests/models/test_flash_phi.py
new file mode 100644
index 00000000..9d6ca566
--- /dev/null
+++ b/integration-tests/models/test_flash_phi.py
@@ -0,0 +1,60 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_phi_handle(launcher):
+    with launcher("microsoft/phi-2", num_shard=1) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_phi(flash_phi_handle):
+    await flash_phi_handle.health(300)
+    return flash_phi_handle.client
+
+
+@pytest.mark.asyncio
+async def test_flash_phi(flash_phi, response_snapshot):
+    response = await flash_phi.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == ': {request}")\n        response = self'
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_phi_all_params(flash_phi, response_snapshot):
+    response = await flash_phi.generate(
+        "Test request",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["network"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 6
+    assert response.generated_text == "Test request to send data over a network"
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_phi_load(flash_phi, generate_load, response_snapshot):
+    responses = await generate_load(flash_phi, "Test request", max_new_tokens=10, n=4)
+
+    assert len(responses) == 4
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text  for r in responses]}"
+    assert responses[0].generated_text == ': {request}")\n        response = self'
+
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_qwen2.py b/integration-tests/models/test_flash_qwen2.py
new file mode 100644
index 00000000..2963aeb4
--- /dev/null
+++ b/integration-tests/models/test_flash_qwen2.py
@@ -0,0 +1,59 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_qwen2_handle(launcher):
+    with launcher("Qwen/Qwen1.5-0.5B") as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_qwen2(flash_qwen2_handle):
+    await flash_qwen2_handle.health(300)
+    return flash_qwen2_handle.client
+
+
+@pytest.mark.asyncio
+async def test_flash_qwen2(flash_qwen2, response_snapshot):
+    response = await flash_qwen2.generate(
+        "Test request", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == "\n# Create a request\nrequest = requests.get"
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_qwen2_all_params(flash_qwen2, response_snapshot):
+    response = await flash_qwen2.generate(
+        "Test request",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_flash_qwen2_load(flash_qwen2, generate_load, response_snapshot):
+    responses = await generate_load(flash_qwen2, "Test request", max_new_tokens=10, n=4)
+
+    assert len(responses) == 4
+    assert all(
+        [r.generated_text == responses[0].generated_text for r in responses]
+    ), f"{[r.generated_text  for r in responses]}"
+    assert responses[0].generated_text == "\n# Create a request\nrequest = requests.get"
+
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_starcoder2.py b/integration-tests/models/test_flash_starcoder2.py
new file mode 100644
index 00000000..ea665b6c
--- /dev/null
+++ b/integration-tests/models/test_flash_starcoder2.py
@@ -0,0 +1,55 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def flash_starcoder2_handle(launcher):
+    with launcher("bigcode/starcoder2-3b", num_shard=2) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_starcoder2(flash_starcoder2_handle):
+    await flash_starcoder2_handle.health(300)
+    return flash_starcoder2_handle.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_starcoder2(flash_starcoder2, response_snapshot):
+    response = await flash_starcoder2.generate(
+        "def print_hello", max_new_tokens=10, decoder_input_details=True
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_starcoder2_default_params(flash_starcoder2, response_snapshot):
+    response = await flash_starcoder2.generate(
+        "def print_hello",
+        max_new_tokens=60,
+        temperature=0.2,
+        top_p=0.95,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 60
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_starcoder2_load(
+    flash_starcoder2, generate_load, response_snapshot
+):
+    responses = await generate_load(
+        flash_starcoder2, "def print_hello", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_flash_starcoder_gptq.py b/integration-tests/models/test_flash_starcoder_gptq.py
index 5e448d55..329158b7 100644
--- a/integration-tests/models/test_flash_starcoder_gptq.py
+++ b/integration-tests/models/test_flash_starcoder_gptq.py
@@ -14,7 +14,6 @@ async def flash_starcoder_gptq(flash_starcoder_gptq_handle):
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_starcoder_gptq(flash_starcoder_gptq, generous_response_snapshot):
     response = await flash_starcoder_gptq.generate(
         "def geometric_mean(L: List[float]):",
@@ -26,7 +25,6 @@ async def test_flash_starcoder_gptq(flash_starcoder_gptq, generous_response_snap
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_starcoder_gptq_default_params(
     flash_starcoder_gptq, generous_response_snapshot
 ):
@@ -43,7 +41,6 @@ async def test_flash_starcoder_gptq_default_params(
 
 
 @pytest.mark.asyncio
-@pytest.mark.private
 async def test_flash_starcoder_gptq_load(
     flash_starcoder_gptq, generate_load, generous_response_snapshot
 ):
diff --git a/integration-tests/models/test_grammar_llama.py b/integration-tests/models/test_grammar_llama.py
new file mode 100644
index 00000000..ce5da8a9
--- /dev/null
+++ b/integration-tests/models/test_grammar_llama.py
@@ -0,0 +1,70 @@
+import pytest
+import json
+
+from text_generation.types import GrammarType
+
+
+@pytest.fixture(scope="module")
+def non_flash_llama_grammar_handle(launcher):
+    with launcher(
+        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        num_shard=1,
+        disable_grammar_support=False,
+        use_flash_attention=False,
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def non_flash_llama_grammar(non_flash_llama_grammar_handle):
+    await non_flash_llama_grammar_handle.health(300)
+    return non_flash_llama_grammar_handle.client
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot):
+    response = await non_flash_llama_grammar.generate(
+        "info: david holtz like trees and has two cats. ",
+        max_new_tokens=100,
+        decoder_input_details=True,
+        seed=0,
+        grammar={
+            "type": GrammarType.Json,
+            "value": json.dumps(
+                {
+                    "type": "object",
+                    "$id": "https://example.com/person.schema.json",
+                    "$schema": "https://json-schema.org/draft/2020-12/schema",
+                    "title": "Person",
+                    "properties": {
+                        "firstName": {
+                            "type": "string",
+                            "description": "The person'''s first name.",
+                        },
+                        "lastName": {
+                            "type": "string",
+                            "description": "The person'''s last name.",
+                        },
+                        "hobby": {
+                            "description": "The person'''s hobby.",
+                            "type": "string",
+                        },
+                        "numCats": {
+                            "description": "The number of cats the person has.",
+                            "type": "integer",
+                            "minimum": 0,
+                        },
+                    },
+                    "required": ["firstName", "lastName", "hobby", "numCats"],
+                }
+            ),
+        },
+    )
+
+    assert response.details.generated_tokens == 30
+    assert (
+        response.generated_text
+        == '{"firstName":"David","hobby":"Trees","lastName":"Holtz","numCats":2}'
+    )
+    assert response == response_snapshot
diff --git a/integration-tests/models/test_idefics.py b/integration-tests/models/test_idefics.py
index 5a81a4f0..aeeaffa1 100644
--- a/integration-tests/models/test_idefics.py
+++ b/integration-tests/models/test_idefics.py
@@ -1,9 +1,12 @@
 import pytest
+import base64
 
 
 @pytest.fixture(scope="module")
 def idefics_handle(launcher):
-    with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2, dtype="float16") as handle:
+    with launcher(
+        "HuggingFaceM4/idefics-9b-instruct", num_shard=2, dtype="float16"
+    ) as handle:
         yield handle
 
 
@@ -13,29 +16,44 @@ async def idefics(idefics_handle):
     return idefics_handle.client
 
 
+# TODO fix the server parsser to count inline image tokens correctly
+def get_chicken():
+    with open("integration-tests/images/chicken_on_money.png", "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read())
+    return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
+
+
 @pytest.mark.asyncio
 async def test_idefics(idefics, response_snapshot):
+    chicken = get_chicken()
     response = await idefics.generate(
-        "User:![](https://temp-5681.s3.us-west-2.amazonaws.com/chicken_on_money.png)Can you tell me a very short story based on the image?",
+        f"User:![]({chicken})Can you tell me a very short story based on the image?",
         max_new_tokens=10,
         decoder_input_details=True,
     )
 
     assert response.details.generated_tokens == 10
+    assert (
+        response.generated_text == " \nAssistant: A rooster stands"
+    ), f"{repr(response.generated_text)}"
     assert response == response_snapshot
 
 
 @pytest.mark.asyncio
 async def test_idefics_load(idefics, generate_load, response_snapshot):
+    chicken = get_chicken()
     responses = await generate_load(
         idefics,
-        "User:![](https://temp-5681.s3.us-west-2.amazonaws.com/chicken_on_money.png)Can you tell me a very short story based on the image?",
+        f"User:![]({chicken})Can you tell me a very short story based on the image?",
         max_new_tokens=10,
         n=4,
     )
 
     generated_texts = [r.generated_text for r in responses]
 
+    assert (
+        generated_texts[0] == " \nAssistant: A rooster stands"
+    ), f"{response.generated_text}"
     assert len(generated_texts) == 4
     assert generated_texts, all(
         [text == generated_texts[0] for text in generated_texts]
diff --git a/integration-tests/models/test_llava_next.py b/integration-tests/models/test_llava_next.py
new file mode 100644
index 00000000..f5b290b1
--- /dev/null
+++ b/integration-tests/models/test_llava_next.py
@@ -0,0 +1,84 @@
+import pytest
+import base64
+
+
+# TODO fix the server parsser to count inline image tokens correctly
+def get_chicken():
+    with open("integration-tests/images/chicken_on_money.png", "rb") as image_file:
+        encoded_string = base64.b64encode(image_file.read())
+    return f"data:image/png;base64,{encoded_string.decode('utf-8')}"
+
+
+@pytest.fixture(scope="module")
+def flash_llava_next_handle(launcher):
+    with launcher(
+        "llava-hf/llava-v1.6-mistral-7b-hf",
+        num_shard=4,
+        max_input_length=4000,
+        max_total_tokens=4096,
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llava_next(flash_llava_next_handle):
+    await flash_llava_next_handle.health(300)
+    return flash_llava_next_handle.client
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llava_next_simple(flash_llava_next, response_snapshot):
+    chicken = get_chicken()
+    response = await flash_llava_next.generate(
+        f"User:![]({chicken})Can you tell me a very short story based on the image?",
+        max_new_tokens=10,
+    )
+    assert (
+        response.generated_text == "\n\nOnce upon a time, there was a"
+    ), f"{repr(response.generated_text)}"
+    assert response.details.generated_tokens == 10
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llava_next_all_params(flash_llava_next, response_snapshot):
+    response = await flash_llava_next.generate(
+        "Test request",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 6
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llava_next_load(
+    flash_llava_next, generate_load, response_snapshot
+):
+    chicken = get_chicken()
+    responses = await generate_load(
+        flash_llava_next,
+        f"User:![]({chicken})Can you tell me a very short story based on the image?",
+        max_new_tokens=10,
+        n=4,
+    )
+    generated_texts = [r.generated_text for r in responses]
+    assert generated_texts[0] == "\n\nOnce upon a time, there was a"
+    assert len(generated_texts) == 4
+    assert all([r.generated_text == generated_texts[0] for r in responses])
+
+    assert responses == response_snapshot
diff --git a/integration-tests/models/test_mamba.py b/integration-tests/models/test_mamba.py
new file mode 100644
index 00000000..bf3701b4
--- /dev/null
+++ b/integration-tests/models/test_mamba.py
@@ -0,0 +1,65 @@
+import pytest
+
+
+@pytest.fixture(scope="module")
+def fused_kernel_mamba_handle(launcher):
+    with launcher("state-spaces/mamba-130m", num_shard=1) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def fused_kernel_mamba(fused_kernel_mamba_handle):
+    await fused_kernel_mamba_handle.health(300)
+    return fused_kernel_mamba_handle.client
+
+
+@pytest.mark.asyncio
+async def test_mamba(fused_kernel_mamba, response_snapshot):
+    response = await fused_kernel_mamba.generate(
+        "What is Deep Learning?", max_new_tokens=10
+    )
+
+    assert response.details.generated_tokens == 10
+    assert response.generated_text == "\n\nDeep learning is a new type of machine"
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
+    response = await fused_kernel_mamba.generate(
+        "blue, red, yellow, ",
+        max_new_tokens=10,
+        repetition_penalty=1.2,
+        return_full_text=True,
+        stop_sequences=["test"],
+        temperature=0.5,
+        top_p=0.9,
+        top_k=10,
+        truncate=5,
+        typical_p=0.9,
+        watermark=True,
+        decoder_input_details=True,
+        seed=0,
+    )
+
+    assert response.details.generated_tokens == 10
+    assert (
+        response.generated_text
+        == "blue, red, yellow, \nand blue colors. A number of different color"
+    )
+    assert response == response_snapshot
+
+
+@pytest.mark.asyncio
+async def test_mamba_load(
+    fused_kernel_mamba, generate_load, generous_response_snapshot
+):
+    responses = await generate_load(
+        fused_kernel_mamba, "What is Deep Learning?", max_new_tokens=10, n=4
+    )
+
+    assert len(responses) == 4
+    assert all([r.generated_text == responses[0].generated_text for r in responses])
+    assert responses[0].generated_text == "\n\nDeep learning is a new type of machine"
+
+    assert responses == generous_response_snapshot
diff --git a/integration-tests/models/test_mt0_base.py b/integration-tests/models/test_mt0_base.py
index 12f23e4c..c877056a 100644
--- a/integration-tests/models/test_mt0_base.py
+++ b/integration-tests/models/test_mt0_base.py
@@ -45,7 +45,7 @@ async def test_mt0_base_all_params(mt0_base, response_snapshot):
         seed=0,
     )
 
-    assert response.details.generated_tokens == 9
+    assert response.details.generated_tokens == 10
     assert response == response_snapshot
 
 
diff --git a/integration-tests/models/test_t5_sharded.py b/integration-tests/models/test_t5_sharded.py
index 7c288b23..4b4cfd98 100644
--- a/integration-tests/models/test_t5_sharded.py
+++ b/integration-tests/models/test_t5_sharded.py
@@ -3,7 +3,7 @@ import pytest
 
 @pytest.fixture(scope="module")
 def t5_sharded_handle(launcher):
-    with launcher("google/flan-t5-xxl", num_shard=2) as handle:
+    with launcher("google/flan-t5-xxl", num_shard=4) as handle:
         yield handle
 
 
diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py
new file mode 100644
index 00000000..d0ae331f
--- /dev/null
+++ b/integration-tests/models/test_tools_llama.py
@@ -0,0 +1,250 @@
+import pytest
+import json
+
+from text_generation.types import GrammarType
+
+
+@pytest.fixture(scope="module")
+def flash_llama_grammar_tools_handle(launcher):
+    with launcher(
+        "TinyLlama/TinyLlama-1.1B-Chat-v1.0", num_shard=2, disable_grammar_support=False
+    ) as handle:
+        yield handle
+
+
+@pytest.fixture(scope="module")
+async def flash_llama_grammar_tools(flash_llama_grammar_tools_handle):
+    await flash_llama_grammar_tools_handle.health(300)
+    return flash_llama_grammar_tools_handle.client
+
+
+# tools to be used in the following tests
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_current_weather",
+            "description": "Get the current weather",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "format": {
+                        "type": "string",
+                        "enum": ["celsius", "fahrenheit"],
+                        "description": "The temperature unit to use. Infer this from the users location.",
+                    },
+                },
+                "required": ["location", "format"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "get_n_day_weather_forecast",
+            "description": "Get an N-day weather forecast",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city and state, e.g. San Francisco, CA",
+                    },
+                    "format": {
+                        "type": "string",
+                        "enum": ["celsius", "fahrenheit"],
+                        "description": "The temperature unit to use. Infer this from the users location.",
+                    },
+                    "num_days": {
+                        "type": "integer",
+                        "description": "The number of days to forecast",
+                    },
+                },
+                "required": ["location", "format", "num_days"],
+            },
+        },
+    },
+]
+
+
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_grammar_no_tools(
+    flash_llama_grammar_tools, response_snapshot
+):
+    response = await flash_llama_grammar_tools.chat(
+        max_tokens=100,
+        seed=1,
+        messages=[
+            {
+                "role": "system",
+                "content": "Youre a helpful assistant! Answer the users question best you can.",
+            },
+            {
+                "role": "user",
+                "content": "What is the weather like in Brooklyn, New York?",
+            },
+        ],
+    )
+
+    assert (
+        response.choices[0].message.content
+        == "As of today, there is a Update available for the Brooklyn, New York, area. According to the latest forecast, it's warm with high temperatures throughout the day. It's forecasted at 75°F for today and 77°F for tomorrow. However, in autumn, the weather typically changes drastically, becoming cooler and wetter. You can find the current weather forecast for the area through your local weather service. Additionally"
+    )
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_snapshot):
+    response = await flash_llama_grammar_tools.chat(
+        max_tokens=100,
+        seed=1,
+        tools=tools,
+        presence_penalty=-1.1,
+        messages=[
+            {
+                "role": "system",
+                "content": "Youre a helpful assistant! Answer the users question best you can.",
+            },
+            {
+                "role": "user",
+                "content": "What is the weather like in Brooklyn, New York?",
+            },
+        ],
+    )
+    assert response.choices[0].message.content == None
+    assert response.choices[0].message.tool_calls == [
+        {
+            "function": {
+                "description": None,
+                "name": "tools",
+                "parameters": {
+                    "format": "celsius",
+                    "location": "New York, NY",
+                    "num_days": 14,
+                },
+            },
+            "id": 0,
+            "type": "function",
+        }
+    ]
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_grammar_tools_auto(
+    flash_llama_grammar_tools, response_snapshot
+):
+    response = await flash_llama_grammar_tools.chat(
+        max_tokens=100,
+        seed=1,
+        tools=tools,
+        tool_choice="auto",
+        presence_penalty=-1.1,
+        messages=[
+            {
+                "role": "system",
+                "content": "Youre a helpful assistant! Answer the users question best you can.",
+            },
+            {
+                "role": "user",
+                "content": "What is the weather like in Brooklyn, New York?",
+            },
+        ],
+    )
+    assert response.choices[0].message.content == None
+    assert response.choices[0].message.tool_calls == [
+        {
+            "function": {
+                "description": None,
+                "name": "tools",
+                "parameters": {
+                    "format": "celsius",
+                    "location": "New York, NY",
+                    "num_days": 14,
+                },
+            },
+            "id": 0,
+            "type": "function",
+        }
+    ]
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_grammar_tools_choice(
+    flash_llama_grammar_tools, response_snapshot
+):
+    response = await flash_llama_grammar_tools.chat(
+        max_tokens=100,
+        seed=1,
+        tools=tools,
+        tool_choice="get_current_weather",
+        presence_penalty=-1.1,
+        messages=[
+            {
+                "role": "system",
+                "content": "Youre a helpful assistant! Answer the users question best you can.",
+            },
+            {
+                "role": "user",
+                "content": "What is the weather like in Brooklyn, New York?",
+            },
+        ],
+    )
+    assert response.choices[0].message.content == None
+    assert response.choices[0].message.tool_calls == [
+        {
+            "id": 0,
+            "type": "function",
+            "function": {
+                "description": None,
+                "name": "tools",
+                "parameters": {"format": "celsius", "location": "New York, NY"},
+            },
+        }
+    ]
+    assert response == response_snapshot
+
+
+@pytest.mark.skip
+@pytest.mark.asyncio
+@pytest.mark.private
+async def test_flash_llama_grammar_tools_stream(
+    flash_llama_grammar_tools, response_snapshot
+):
+    responses = await flash_llama_grammar_tools.chat(
+        max_tokens=100,
+        seed=1,
+        tools=tools,
+        tool_choice="get_current_weather",
+        presence_penalty=-1.1,
+        messages=[
+            {
+                "role": "system",
+                "content": "Youre a helpful assistant! Answer the users question best you can.",
+            },
+            {
+                "role": "user",
+                "content": "What is the weather like in Paris, France?",
+            },
+        ],
+        stream=True,
+    )
+
+    count = 0
+    async for response in responses:
+        count += 1
+
+    assert count == 20
+    assert response == response_snapshot
diff --git a/integration-tests/poetry.lock b/integration-tests/poetry.lock
index e156c161..3af99942 100644
--- a/integration-tests/poetry.lock
+++ b/integration-tests/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -122,6 +122,17 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "annotated-types"
+version = "0.6.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
+    {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
+]
+
 [[package]]
 name = "async-timeout"
 version = "4.0.3"
@@ -590,55 +601,113 @@ testing = ["pytest", "pytest-benchmark"]
 
 [[package]]
 name = "pydantic"
-version = "1.10.12"
-description = "Data validation and settings management using python type hints"
+version = "2.6.4"
+description = "Data validation using Python type hints"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"},
-    {file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"},
-    {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"},
-    {file = "pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"},
-    {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"},
-    {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"},
-    {file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash = "sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"},
-    {file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"},
-    {file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"},
-    {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"},
-    {file = "pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"},
-    {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"},
-    {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"},
-    {file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash = "sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"},
-    {file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"},
-    {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"},
-    {file = "pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"},
-    {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"},
-    {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"},
-    {file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash = "sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"},
-    {file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"},
-    {file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"},
-    {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"},
-    {file = "pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"},
-    {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"},
-    {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"},
-    {file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash = "sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"},
-    {file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"},
-    {file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"},
-    {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"},
-    {file = "pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"},
-    {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"},
-    {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"},
-    {file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash = "sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"},
-    {file = "pydantic-1.10.12-py3-none-any.whl", hash = "sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"},
-    {file = "pydantic-1.10.12.tar.gz", hash = "sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"},
+    {file = "pydantic-2.6.4-py3-none-any.whl", hash = "sha256:cc46fce86607580867bdc3361ad462bab9c222ef042d3da86f2fb333e1d916c5"},
+    {file = "pydantic-2.6.4.tar.gz", hash = "sha256:b1704e0847db01817624a6b86766967f552dd9dbf3afba4004409f908dcc84e6"},
 ]
 
 [package.dependencies]
-typing-extensions = ">=4.2.0"
+annotated-types = ">=0.4.0"
+pydantic-core = "2.16.3"
+typing-extensions = ">=4.6.1"
 
 [package.extras]
-dotenv = ["python-dotenv (>=0.10.4)"]
-email = ["email-validator (>=1.0.3)"]
+email = ["email-validator (>=2.0.0)"]
+
+[[package]]
+name = "pydantic-core"
+version = "2.16.3"
+description = ""
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pydantic_core-2.16.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:75b81e678d1c1ede0785c7f46690621e4c6e63ccd9192af1f0bd9d504bbb6bf4"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9c865a7ee6f93783bd5d781af5a4c43dadc37053a5b42f7d18dc019f8c9d2bd1"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:162e498303d2b1c036b957a1278fa0899d02b2842f1ff901b6395104c5554a45"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2f583bd01bbfbff4eaee0868e6fc607efdfcc2b03c1c766b06a707abbc856187"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b926dd38db1519ed3043a4de50214e0d600d404099c3392f098a7f9d75029ff8"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:716b542728d4c742353448765aa7cdaa519a7b82f9564130e2b3f6766018c9ec"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc4ad7f7ee1a13d9cb49d8198cd7d7e3aa93e425f371a68235f784e99741561f"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bd87f48924f360e5d1c5f770d6155ce0e7d83f7b4e10c2f9ec001c73cf475c99"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0df446663464884297c793874573549229f9eca73b59360878f382a0fc085979"},
+    {file = "pydantic_core-2.16.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4df8a199d9f6afc5ae9a65f8f95ee52cae389a8c6b20163762bde0426275b7db"},
+    {file = "pydantic_core-2.16.3-cp310-none-win32.whl", hash = "sha256:456855f57b413f077dff513a5a28ed838dbbb15082ba00f80750377eed23d132"},
+    {file = "pydantic_core-2.16.3-cp310-none-win_amd64.whl", hash = "sha256:732da3243e1b8d3eab8c6ae23ae6a58548849d2e4a4e03a1924c8ddf71a387cb"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:519ae0312616026bf4cedc0fe459e982734f3ca82ee8c7246c19b650b60a5ee4"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b3992a322a5617ded0a9f23fd06dbc1e4bd7cf39bc4ccf344b10f80af58beacd"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d62da299c6ecb04df729e4b5c52dc0d53f4f8430b4492b93aa8de1f541c4aac"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2acca2be4bb2f2147ada8cac612f8a98fc09f41c89f87add7256ad27332c2fda"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1b662180108c55dfbf1280d865b2d116633d436cfc0bba82323554873967b340"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e7c6ed0dc9d8e65f24f5824291550139fe6f37fac03788d4580da0d33bc00c97"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1bb0827f56654b4437955555dc3aeeebeddc47c2d7ed575477f082622c49e"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e56f8186d6210ac7ece503193ec84104da7ceb98f68ce18c07282fcc2452e76f"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:936e5db01dd49476fa8f4383c259b8b1303d5dd5fb34c97de194560698cc2c5e"},
+    {file = "pydantic_core-2.16.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:33809aebac276089b78db106ee692bdc9044710e26f24a9a2eaa35a0f9fa70ba"},
+    {file = "pydantic_core-2.16.3-cp311-none-win32.whl", hash = "sha256:ded1c35f15c9dea16ead9bffcde9bb5c7c031bff076355dc58dcb1cb436c4721"},
+    {file = "pydantic_core-2.16.3-cp311-none-win_amd64.whl", hash = "sha256:d89ca19cdd0dd5f31606a9329e309d4fcbb3df860960acec32630297d61820df"},
+    {file = "pydantic_core-2.16.3-cp311-none-win_arm64.whl", hash = "sha256:6162f8d2dc27ba21027f261e4fa26f8bcb3cf9784b7f9499466a311ac284b5b9"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0f56ae86b60ea987ae8bcd6654a887238fd53d1384f9b222ac457070b7ac4cff"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9bd22a2a639e26171068f8ebb5400ce2c1bc7d17959f60a3b753ae13c632975"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4204e773b4b408062960e65468d5346bdfe139247ee5f1ca2a378983e11388a2"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f651dd19363c632f4abe3480a7c87a9773be27cfe1341aef06e8759599454120"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aaf09e615a0bf98d406657e0008e4a8701b11481840be7d31755dc9f97c44053"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8e47755d8152c1ab5b55928ab422a76e2e7b22b5ed8e90a7d584268dd49e9c6b"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:500960cb3a0543a724a81ba859da816e8cf01b0e6aaeedf2c3775d12ee49cade"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cf6204fe865da605285c34cf1172879d0314ff267b1c35ff59de7154f35fdc2e"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d33dd21f572545649f90c38c227cc8631268ba25c460b5569abebdd0ec5974ca"},
+    {file = "pydantic_core-2.16.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:49d5d58abd4b83fb8ce763be7794d09b2f50f10aa65c0f0c1696c677edeb7cbf"},
+    {file = "pydantic_core-2.16.3-cp312-none-win32.whl", hash = "sha256:f53aace168a2a10582e570b7736cc5bef12cae9cf21775e3eafac597e8551fbe"},
+    {file = "pydantic_core-2.16.3-cp312-none-win_amd64.whl", hash = "sha256:0d32576b1de5a30d9a97f300cc6a3f4694c428d956adbc7e6e2f9cad279e45ed"},
+    {file = "pydantic_core-2.16.3-cp312-none-win_arm64.whl", hash = "sha256:ec08be75bb268473677edb83ba71e7e74b43c008e4a7b1907c6d57e940bf34b6"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1f6f5938d63c6139860f044e2538baeee6f0b251a1816e7adb6cbce106a1f01"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2a1ef6a36fdbf71538142ed604ad19b82f67b05749512e47f247a6ddd06afdc7"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:704d35ecc7e9c31d48926150afada60401c55efa3b46cd1ded5a01bdffaf1d48"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d937653a696465677ed583124b94a4b2d79f5e30b2c46115a68e482c6a591c8a"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c9803edf8e29bd825f43481f19c37f50d2b01899448273b3a7758441b512acf8"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:72282ad4892a9fb2da25defeac8c2e84352c108705c972db82ab121d15f14e6d"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f752826b5b8361193df55afcdf8ca6a57d0232653494ba473630a83ba50d8c9"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4384a8f68ddb31a0b0c3deae88765f5868a1b9148939c3f4121233314ad5532c"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:a4b2bf78342c40b3dc830880106f54328928ff03e357935ad26c7128bbd66ce8"},
+    {file = "pydantic_core-2.16.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:13dcc4802961b5f843a9385fc821a0b0135e8c07fc3d9949fd49627c1a5e6ae5"},
+    {file = "pydantic_core-2.16.3-cp38-none-win32.whl", hash = "sha256:e3e70c94a0c3841e6aa831edab1619ad5c511199be94d0c11ba75fe06efe107a"},
+    {file = "pydantic_core-2.16.3-cp38-none-win_amd64.whl", hash = "sha256:ecdf6bf5f578615f2e985a5e1f6572e23aa632c4bd1dc67f8f406d445ac115ed"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:bda1ee3e08252b8d41fa5537413ffdddd58fa73107171a126d3b9ff001b9b820"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:21b888c973e4f26b7a96491c0965a8a312e13be108022ee510248fe379a5fa23"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be0ec334369316fa73448cc8c982c01e5d2a81c95969d58b8f6e272884df0074"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b5b6079cc452a7c53dd378c6f881ac528246b3ac9aae0f8eef98498a75657805"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ee8d5f878dccb6d499ba4d30d757111847b6849ae07acdd1205fffa1fc1253c"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7233d65d9d651242a68801159763d09e9ec96e8a158dbf118dc090cd77a104c9"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c6119dc90483a5cb50a1306adb8d52c66e447da88ea44f323e0ae1a5fcb14256"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:578114bc803a4c1ff9946d977c221e4376620a46cf78da267d946397dc9514a8"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8f99b147ff3fcf6b3cc60cb0c39ea443884d5559a30b1481e92495f2310ff2b"},
+    {file = "pydantic_core-2.16.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4ac6b4ce1e7283d715c4b729d8f9dab9627586dafce81d9eaa009dd7f25dd972"},
+    {file = "pydantic_core-2.16.3-cp39-none-win32.whl", hash = "sha256:e7774b570e61cb998490c5235740d475413a1f6de823169b4cf94e2fe9e9f6b2"},
+    {file = "pydantic_core-2.16.3-cp39-none-win_amd64.whl", hash = "sha256:9091632a25b8b87b9a605ec0e61f241c456e9248bfdcf7abdf344fdb169c81cf"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:36fa178aacbc277bc6b62a2c3da95226520da4f4e9e206fdf076484363895d2c"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:dcca5d2bf65c6fb591fff92da03f94cd4f315972f97c21975398bd4bd046854a"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a72fb9963cba4cd5793854fd12f4cfee731e86df140f59ff52a49b3552db241"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b60cc1a081f80a2105a59385b92d82278b15d80ebb3adb200542ae165cd7d183"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cbcc558401de90a746d02ef330c528f2e668c83350f045833543cd57ecead1ad"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:fee427241c2d9fb7192b658190f9f5fd6dfe41e02f3c1489d2ec1e6a5ab1e04a"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f4cb85f693044e0f71f394ff76c98ddc1bc0953e48c061725e540396d5c8a2e1"},
+    {file = "pydantic_core-2.16.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b29eeb887aa931c2fcef5aa515d9d176d25006794610c264ddc114c053bf96fe"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a425479ee40ff021f8216c9d07a6a3b54b31c8267c6e17aa88b70d7ebd0e5e5b"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5c5cbc703168d1b7a838668998308018a2718c2130595e8e190220238addc96f"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99b6add4c0b39a513d323d3b93bc173dac663c27b99860dd5bf491b240d26137"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75f76ee558751746d6a38f89d60b6228fa174e5172d143886af0f85aa306fd89"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:00ee1c97b5364b84cb0bd82e9bbf645d5e2871fb8c58059d158412fee2d33d8a"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:287073c66748f624be4cef893ef9174e3eb88fe0b8a78dc22e88eca4bc357ca6"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ed25e1835c00a332cb10c683cd39da96a719ab1dfc08427d476bce41b92531fc"},
+    {file = "pydantic_core-2.16.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:86b3d0033580bd6bbe07590152007275bd7af95f98eaa5bd36f3da219dcd93da"},
+    {file = "pydantic_core-2.16.3.tar.gz", hash = "sha256:1cac689f80a3abab2d3c0048b29eea5751114054f032a941a32de4c852c59cad"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
 [[package]]
 name = "pytest"
@@ -728,6 +797,7 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
     {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"},
     {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
     {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
     {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
@@ -800,19 +870,19 @@ pytest = ">=7.0.0,<8.0.0"
 
 [[package]]
 name = "text-generation"
-version = "0.6.0"
+version = "0.6.1"
 description = "Hugging Face Text Generation Python Client"
 optional = false
 python-versions = ">=3.7,<4.0"
 files = [
-    {file = "text-generation-0.6.0.tar.gz", hash = "sha256:48560e7a67b9a88b38335382d357f66e23b5a75f53971ccd436fc6f696a00815"},
-    {file = "text_generation-0.6.0-py3-none-any.whl", hash = "sha256:42ae7f7c9ff11f3a6c9d210f94fe708fe693eede79c6776da727456da1606ef9"},
+    {file = "text_generation-0.6.1-py3-none-any.whl", hash = "sha256:ebca00587eeabc0f5118f66ee1048bf690bd7735a9a10361c533c31c8c0bf994"},
+    {file = "text_generation-0.6.1.tar.gz", hash = "sha256:730e662aa7812f73c08ab953e008e90455f3d046f81efa0ef3de462bd4cf63d9"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.8,<4.0"
 huggingface-hub = ">=0.12,<1.0"
-pydantic = ">=1.10,<2.0"
+pydantic = ">1.10,<3"
 
 [[package]]
 name = "tomli"
@@ -979,4 +1049,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "bdad1d22d29138010cd6b11e1b92dc0630b35634422413a8456dc85a15bee05e"
+content-hash = "421fbce065cb1499c666599cf0fd83a5ce8fb3bed09e83c16c3a3d6953b34026"
diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml
index 75411131..6923ff23 100644
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
@@ -1,10 +1,11 @@
 [tool.poetry]
 name = "text-generation-integration-tests"
-version = "1.2.0"
+version = "2.0.0"
 description = "Text Generation Inference integration tests"
 authors = ["Nicolas Patry <nicolas@huggingface.co>"]
 
 [tool.poetry.dependencies]
+pydantic = "> 2, < 3"
 python = ">=3.9,<3.13"
 syrupy = "4.0.1"
 text-generation = "^0.6.0"
diff --git a/integration-tests/pytest.ini b/integration-tests/pytest.ini
index 7dcae663..bab689d7 100644
--- a/integration-tests/pytest.ini
+++ b/integration-tests/pytest.ini
@@ -2,4 +2,4 @@
 addopts = --snapshot-warn-unused
 asyncio_mode = auto
 markers =
-    private: marks tests as requiring an admin hf token (deselect with '-m "not private"')
\ No newline at end of file
+    private: marks tests as requiring an admin hf token (deselect with '-m "not private"')
diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
index 3f779a90..3c2ce11b 100644
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
@@ -1,5 +1,6 @@
 aiohttp==3.8.5 ; python_version >= "3.9" and python_version < "3.13"
 aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "3.13"
+annotated-types==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
 async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.13"
 attrs==23.1.0 ; python_version >= "3.9" and python_version < "3.13"
 certifi==2023.7.22 ; python_version >= "3.9" and python_version < "3.13"
@@ -17,14 +18,15 @@ iniconfig==2.0.0 ; python_version >= "3.9" and python_version < "3.13"
 multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
 packaging==23.1 ; python_version >= "3.9" and python_version < "3.13"
 pluggy==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
-pydantic==1.10.12 ; python_version >= "3.9" and python_version < "3.13"
+pydantic-core==2.16.3 ; python_version >= "3.9" and python_version < "3.13"
+pydantic==2.6.4 ; python_version >= "3.9" and python_version < "3.13"
 pytest-asyncio==0.21.1 ; python_version >= "3.9" and python_version < "3.13"
 pytest==7.4.0 ; python_version >= "3.9" and python_version < "3.13"
 pywin32==306 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
 syrupy==4.0.1 ; python_version >= "3.9" and python_version < "3.13"
-text-generation==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
+text-generation==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
 tomli==2.0.1 ; python_version >= "3.9" and python_version < "3.11"
 tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
 typing-extensions==4.7.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/launcher/Cargo.toml b/launcher/Cargo.toml
index 71bbf601..6b6fd58e 100644
--- a/launcher/Cargo.toml
+++ b/launcher/Cargo.toml
@@ -9,8 +9,10 @@ homepage.workspace = true
 [dependencies]
 clap = { version = "4.4.5", features = ["derive", "env"] }
 ctrlc = { version = "3.4.1", features = ["termination"] }
-nix = "0.28.0"
-serde = { version = "1.0.188", features = ["derive"]  }
+hf-hub = "0.3.2"
+nix = { version = "0.28.0", features = ["signal"] }
+once_cell = "1.19.0"
+serde = { version = "1.0.188", features = ["derive"] }
 serde_json = "1.0.107"
 tracing = "0.1.37"
 tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 0a023234..5b808b62 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -1,12 +1,13 @@
 /// Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
 use clap::{Parser, ValueEnum};
+use hf_hub::{api::sync::Api, Repo, RepoType};
 use nix::sys::signal::{self, Signal};
 use nix::unistd::Pid;
 use serde::Deserialize;
 use std::env;
 use std::ffi::OsString;
-use std::io::{BufRead, BufReader, Lines, Read};
+use std::io::{BufRead, BufReader, Lines};
 use std::os::unix::process::{CommandExt, ExitStatusExt};
 use std::path::Path;
 use std::process::{Child, Command, ExitStatus, Stdio};
@@ -21,18 +22,24 @@ use tracing_subscriber::EnvFilter;
 
 mod env_runtime;
 
+#[derive(Deserialize)]
+struct Config {
+    max_position_embeddings: Option<usize>,
+    max_seq_len: Option<usize>,
+}
+
 #[derive(Clone, Copy, Debug, ValueEnum)]
 enum Quantization {
-    /// 4 bit quantization. Requires a specific GTPQ quantized model:
-    ///   https://hf.co/models?search=awq.
-    /// Should replace GPTQ models whereever possible because of the better latency
+    /// 4 bit quantization. Requires a specific AWQ quantized model:
+    ///   <https://hf.co/models?search=awq>.
+    /// Should replace GPTQ models wherever possible because of the better latency
     Awq,
     /// 8 bit quantization, doesn't require specific model.
     /// Should be a drop-in replacement to bitsandbytes with much better performance.
-    /// Kernels are from https://github.com/NetEase-FuXi/EETQ.git
+    /// Kernels are from <https://github.com/NetEase-FuXi/EETQ.git>
     Eetq,
-    /// 4 bit quantization. Requires a specific GTPQ quantized model: https://hf.co/models?search=gptq.
-    /// text-generation-inference will use exllama (faster) kernels whereever possible, and use
+    /// 4 bit quantization. Requires a specific GTPQ quantized model: <https://hf.co/models?search=gptq>.
+    /// text-generation-inference will use exllama (faster) kernels wherever possible, and use
     /// triton kernel (wider support) when it's not.
     /// AWQ has faster kernels.
     Gptq,
@@ -49,12 +56,19 @@ enum Quantization {
     /// Bitsandbytes 4bit. nf4 should be preferred in most cases but maybe this one has better
     /// perplexity performance for you model
     BitsandbytesFP4,
+    /// [FP8](https://developer.nvidia.com/blog/nvidia-arm-and-intel-publish-fp8-specification-for-standardization-as-an-interchange-format-for-ai/) (e4m3) works on H100 and above
+    /// This dtype has native ops should be the fastest if available.
+    /// This is currently not the fastest because of local unpacking + padding to satisfy matrix
+    /// multiplication limitations.
+    Fp8,
 }
 
 impl std::fmt::Display for Quantization {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         // To keep in track with `server`.
         match self {
+            #[allow(deprecated)]
+            // Use `eetq` instead, which provides better latencies overall and is drop-in in most cases
             Quantization::Bitsandbytes => {
                 write!(f, "bitsandbytes")
             }
@@ -73,6 +87,9 @@ impl std::fmt::Display for Quantization {
             Quantization::Eetq => {
                 write!(f, "eetq")
             }
+            Quantization::Fp8 => {
+                write!(f, "fp8")
+            }
         }
     }
 }
@@ -157,6 +174,13 @@ struct Args {
     #[clap(long, env, value_enum)]
     quantize: Option<Quantization>,
 
+    /// The number of input_ids to speculate on
+    /// If using a medusa model, the heads will be picked up automatically
+    /// Other wise, it will use n-gram speculation which is relatively free
+    /// in terms of compute, but the speedup heavily depends on the task.
+    #[clap(long, env)]
+    speculate: Option<usize>,
+
     /// The dtype to be forced upon the model. This option cannot be used with `--quantize`.
     #[clap(long, env, value_enum)]
     dtype: Option<Dtype>,
@@ -199,8 +223,13 @@ struct Args {
     /// for users. The larger this value, the longer prompt users can send which
     /// can impact the overall memory required to handle the load.
     /// Please note that some models have a finite range of sequence they can handle.
-    #[clap(default_value = "1024", long, env)]
-    max_input_length: usize,
+    /// Default to min(max_position_embeddings - 1, 4095)
+    #[clap(long, env)]
+    max_input_tokens: Option<usize>,
+
+    /// Legacy version of [`Args::max_input_tokens`].
+    #[clap(long, env)]
+    max_input_length: Option<usize>,
 
     /// This is the most important value to set as it defines the "memory budget"
     /// of running clients requests.
@@ -210,8 +239,9 @@ struct Args {
     /// `1511` max_new_tokens.
     /// The larger this value, the larger amount each request will be in your RAM
     /// and the less effective batching can be.
-    #[clap(default_value = "2048", long, env)]
-    max_total_tokens: usize,
+    /// Default to min(max_position_embeddings, 4096)
+    #[clap(long, env)]
+    max_total_tokens: Option<usize>,
 
     /// This represents the ratio of waiting queries vs running queries where
     /// you want to start considering pausing the running queries to include the waiting
@@ -229,8 +259,9 @@ struct Args {
     /// Limits the number of tokens for the prefill operation.
     /// Since this operation take the most memory and is compute bound, it is interesting
     /// to limit the number of requests that can be sent.
-    #[clap(default_value = "4096", long, env)]
-    max_batch_prefill_tokens: u32,
+    /// Default to `max_input_tokens + 50` to give a bit of room.
+    #[clap(long, env)]
+    max_batch_prefill_tokens: Option<u32>,
 
     /// **IMPORTANT** This is one critical control to allow maximum usage
     /// of the available hardware.
@@ -272,6 +303,17 @@ struct Args {
     #[clap(default_value = "20", long, env)]
     max_waiting_tokens: usize,
 
+    /// Enforce a maximum number of requests per batch
+    /// Specific flag for hardware targets that do not support unpadded inference
+    #[clap(long, env)]
+    max_batch_size: Option<usize>,
+
+    /// Specify the batch sizes to compute cuda graphs for.
+    /// Use "0" to disable.
+    /// Default = "1,2,4,8,16,32"
+    #[clap(long, env, value_delimiter = ',')]
+    cuda_graphs: Option<Vec<usize>>,
+
     /// The IP address to listen on
     #[clap(default_value = "0.0.0.0", long, env)]
     hostname: String,
@@ -361,6 +403,16 @@ struct Args {
     #[clap(long, env)]
     ngrok_edge: Option<String>,
 
+    /// The path to the tokenizer config file. This path is used to load the tokenizer configuration which may
+    /// include a `chat_template`. If not provided, the default config will be used from the model hub.
+    #[clap(long, env)]
+    tokenizer_config_path: Option<String>,
+
+    /// Disable outlines grammar constrained generation.
+    /// This is a feature that allows you to generate text that follows a specific grammar.
+    #[clap(long, env)]
+    disable_grammar_support: bool,
+
     /// Display a lot of information about your runtime environment
     #[clap(long, short, action)]
     env: bool,
@@ -377,6 +429,7 @@ fn shard_manager(
     model_id: String,
     revision: Option<String>,
     quantize: Option<Quantization>,
+    speculate: Option<usize>,
     dtype: Option<Dtype>,
     max_total_tokens: usize,
     trust_remote_code: bool,
@@ -390,6 +443,7 @@ fn shard_manager(
     disable_custom_kernels: bool,
     watermark_gamma: Option<f32>,
     watermark_delta: Option<f32>,
+    cuda_graphs: Vec<usize>,
     cuda_memory_fraction: f32,
     rope_scaling: Option<RopeScaling>,
     rope_factor: Option<f32>,
@@ -435,6 +489,11 @@ fn shard_manager(
         shard_args.push(quantize.to_string())
     }
 
+    if let Some(speculate) = speculate {
+        shard_args.push("--speculate".to_string());
+        shard_args.push(speculate.to_string())
+    }
+
     if let Some(dtype) = dtype {
         shard_args.push("--dtype".to_string());
         shard_args.push(dtype.to_string())
@@ -461,6 +520,9 @@ fn shard_manager(
     // Copy current process env
     let mut envs: Vec<(OsString, OsString)> = env::vars_os().collect();
 
+    // Remove LOG_LEVEL if present
+    envs.retain(|(name, _)| name != "LOG_LEVEL");
+
     // Max total tokens
     envs.push(("MAX_TOTAL_TOKENS".into(), max_total_tokens.to_string().into()));
 
@@ -471,7 +533,7 @@ fn shard_manager(
     envs.push(("WORLD_SIZE".into(), world_size.to_string().into()));
     envs.push(("MASTER_ADDR".into(), master_addr.into()));
     envs.push(("MASTER_PORT".into(), master_port.to_string().into()));
-    envs.push(("NCCL_ASYNC_ERROR_HANDLING".into(), "1".into()));
+    envs.push(("TORCH_NCCL_AVOID_RECORD_STREAMS".into(), "1".into()));
 
     // CUDA memory fraction
     envs.push((
@@ -482,6 +544,9 @@ fn shard_manager(
     // Safetensors load fast
     envs.push(("SAFETENSORS_FAST_GPU".into(), "1".into()));
 
+    // Disable progress bar
+    envs.push(("HF_HUB_DISABLE_PROGRESS_BARS".into(), "1".into()));
+
     // Enable hf transfer for insane download speeds
     let enable_hf_transfer = env::var("HF_HUB_ENABLE_HF_TRANSFER").unwrap_or("1".to_string());
     envs.push((
@@ -518,6 +583,19 @@ fn shard_manager(
         ));
     };
 
+    // Enable experimental support for cuda graphs
+    if !cuda_graphs.is_empty() {
+        envs.push((
+            "CUDA_GRAPHS".into(),
+            cuda_graphs
+                .into_iter()
+                .map(|c| c.to_string())
+                .collect::<Vec<_>>()
+                .join(",")
+                .into(),
+        ));
+    }
+
     // If disable_custom_kernels is true, pass it to the shard as an env var
     if disable_custom_kernels {
         envs.push(("DISABLE_CUSTOM_KERNELS".into(), "True".into()))
@@ -537,6 +615,7 @@ fn shard_manager(
     tracing::info!("Starting shard");
     let mut p = match Command::new("text-generation-server")
         .args(shard_args)
+        .env_clear()
         .envs(envs)
         .stdout(Stdio::piped())
         .stderr(Stdio::piped())
@@ -566,6 +645,13 @@ fn shard_manager(
     thread::spawn(move || {
         log_lines(shard_stdout_reader.lines());
     });
+    // We read stderr in another thread as it seems that lines() can block in some cases
+    let (err_sender, err_receiver) = mpsc::channel();
+    thread::spawn(move || {
+        for line in shard_stderr_reader.lines().map_while(Result::ok) {
+            err_sender.send(line).unwrap_or(());
+        }
+    });
 
     let mut ready = false;
     let start_time = Instant::now();
@@ -573,13 +659,6 @@ fn shard_manager(
     loop {
         // Process exited
         if let Some(exit_status) = p.try_wait().unwrap() {
-            // We read stderr in another thread as it seems that lines() can block in some cases
-            let (err_sender, err_receiver) = mpsc::channel();
-            thread::spawn(move || {
-                for line in shard_stderr_reader.lines().flatten() {
-                    err_sender.send(line).unwrap_or(());
-                }
-            });
             let mut err = String::new();
             while let Ok(line) = err_receiver.recv_timeout(Duration::from_millis(10)) {
                 err = err + "\n" + &line;
@@ -687,7 +766,7 @@ impl TryFrom<&String> for PythonLogMessage {
 }
 
 fn log_lines<S: Sized + BufRead>(lines: Lines<S>) {
-    for line in lines.flatten() {
+    for line in lines.map_while(Result::ok) {
         match PythonLogMessage::try_from(&line) {
             Ok(log) => log.trace(),
             Err(_) => tracing::debug!("{line}"),
@@ -747,6 +826,14 @@ enum LauncherError {
     WebserverCannotStart,
 }
 
+impl core::fmt::Display for LauncherError {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "{self:?}")
+    }
+}
+
+impl std::error::Error for LauncherError {}
+
 fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), LauncherError> {
     // Enter download tracing span
     let _span = tracing::span!(tracing::Level::INFO, "download").entered();
@@ -775,6 +862,12 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
     // Copy current process env
     let mut envs: Vec<(OsString, OsString)> = env::vars_os().collect();
 
+    // Remove LOG_LEVEL if present
+    envs.retain(|(name, _)| name != "LOG_LEVEL");
+
+    // Disable progress bar
+    envs.push(("HF_HUB_DISABLE_PROGRESS_BARS".into(), "1".into()));
+
     // If huggingface_hub_cache is set, pass it to the download process
     // Useful when running inside a docker container
     if let Some(ref huggingface_hub_cache) = args.huggingface_hub_cache {
@@ -806,6 +899,7 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
     tracing::info!("Starting download process.");
     let mut download_process = match Command::new("text-generation-server")
         .args(download_args)
+        .env_clear()
         .envs(envs)
         .stdout(Stdio::piped())
         .stderr(Stdio::piped())
@@ -825,12 +919,20 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
         }
     };
 
-    // Redirect STDOUT to the console
-    let download_stdout = download_process.stdout.take().unwrap();
-    let stdout = BufReader::new(download_stdout);
+    let download_stdout = BufReader::new(download_process.stdout.take().unwrap());
 
     thread::spawn(move || {
-        log_lines(stdout.lines());
+        log_lines(download_stdout.lines());
+    });
+
+    let download_stderr = BufReader::new(download_process.stderr.take().unwrap());
+
+    // We read stderr in another thread as it seems that lines() can block in some cases
+    let (err_sender, err_receiver) = mpsc::channel();
+    thread::spawn(move || {
+        for line in download_stderr.lines().map_while(Result::ok) {
+            err_sender.send(line).unwrap_or(());
+        }
     });
 
     loop {
@@ -841,12 +943,10 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
             }
 
             let mut err = String::new();
-            download_process
-                .stderr
-                .take()
-                .unwrap()
-                .read_to_string(&mut err)
-                .unwrap();
+            while let Ok(line) = err_receiver.recv_timeout(Duration::from_millis(10)) {
+                err = err + "\n" + &line;
+            }
+
             if let Some(signal) = status.signal() {
                 tracing::error!(
                     "Download process was signaled to shutdown with signal {signal}: {err}"
@@ -870,12 +970,14 @@ fn download_convert_model(args: &Args, running: Arc<AtomicBool>) -> Result<(), L
 fn spawn_shards(
     num_shard: usize,
     args: &Args,
+    cuda_graphs: Vec<usize>,
     shutdown: Arc<AtomicBool>,
     shutdown_receiver: &mpsc::Receiver<()>,
     shutdown_sender: mpsc::Sender<()>,
     status_receiver: &mpsc::Receiver<ShardStatus>,
     status_sender: mpsc::Sender<ShardStatus>,
     running: Arc<AtomicBool>,
+    max_total_tokens: usize,
 ) -> Result<(), LauncherError> {
     // Start shard processes
     for rank in 0..1 {
@@ -890,13 +992,14 @@ fn spawn_shards(
         let shutdown_sender = shutdown_sender.clone();
         let otlp_endpoint = args.otlp_endpoint.clone();
         let quantize = args.quantize;
+        let speculate = args.speculate;
         let dtype = args.dtype;
-        let max_total_tokens = args.max_total_tokens;
         let trust_remote_code = args.trust_remote_code;
         let master_port = args.master_port;
         let disable_custom_kernels = args.disable_custom_kernels;
         let watermark_gamma = args.watermark_gamma;
         let watermark_delta = args.watermark_delta;
+        let cuda_graphs_clone = cuda_graphs.clone();
         let cuda_memory_fraction = args.cuda_memory_fraction;
         let rope_scaling = args.rope_scaling;
         let rope_factor = args.rope_factor;
@@ -905,6 +1008,7 @@ fn spawn_shards(
                 model_id,
                 revision,
                 quantize,
+                speculate,
                 dtype,
                 max_total_tokens,
                 trust_remote_code,
@@ -918,6 +1022,7 @@ fn spawn_shards(
                 disable_custom_kernels,
                 watermark_gamma,
                 watermark_delta,
+                cuda_graphs_clone,
                 cuda_memory_fraction,
                 rope_scaling,
                 rope_factor,
@@ -958,8 +1063,24 @@ fn spawn_shards(
     Ok(())
 }
 
+fn compute_type(num_shard: usize) -> Option<String> {
+    let output = Command::new("nvidia-smi")
+        .args(["--query-gpu=gpu_name", "--format=csv"])
+        .output()
+        .ok()?;
+    let output = String::from_utf8(output.stdout).ok()?;
+    let fullname = output.split('\n').nth(1)?;
+    let cardname = fullname.replace(' ', "-").to_lowercase();
+    let compute_type = format!("{num_shard}-{cardname}");
+    Some(compute_type)
+}
+
 fn spawn_webserver(
+    num_shard: usize,
     args: Args,
+    max_input_tokens: usize,
+    max_total_tokens: usize,
+    max_batch_prefill_tokens: u32,
     shutdown: Arc<AtomicBool>,
     shutdown_receiver: &mpsc::Receiver<()>,
 ) -> Result<Child, LauncherError> {
@@ -975,12 +1096,12 @@ fn spawn_webserver(
         args.max_stop_sequences.to_string(),
         "--max-top-n-tokens".to_string(),
         args.max_top_n_tokens.to_string(),
-        "--max-input-length".to_string(),
-        args.max_input_length.to_string(),
+        "--max-input-tokens".to_string(),
+        max_input_tokens.to_string(),
         "--max-total-tokens".to_string(),
-        args.max_total_tokens.to_string(),
+        max_total_tokens.to_string(),
         "--max-batch-prefill-tokens".to_string(),
-        args.max_batch_prefill_tokens.to_string(),
+        max_batch_prefill_tokens.to_string(),
         "--waiting-served-ratio".to_string(),
         args.waiting_served_ratio.to_string(),
         "--max-waiting-tokens".to_string(),
@@ -997,12 +1118,29 @@ fn spawn_webserver(
         args.model_id,
     ];
 
+    // Grammar support
+    if args.disable_grammar_support {
+        router_args.push("--disable-grammar-support".to_string());
+    }
+
+    // Tokenizer config path
+    if let Some(ref tokenizer_config_path) = args.tokenizer_config_path {
+        router_args.push("--tokenizer-config-path".to_string());
+        router_args.push(tokenizer_config_path.to_string());
+    }
+
     // Model optional max batch total tokens
     if let Some(max_batch_total_tokens) = args.max_batch_total_tokens {
         router_args.push("--max-batch-total-tokens".to_string());
         router_args.push(max_batch_total_tokens.to_string());
     }
 
+    // Router optional max batch size
+    if let Some(max_batch_size) = args.max_batch_size {
+        router_args.push("--max-batch-size".to_string());
+        router_args.push(max_batch_size.to_string());
+    }
+
     // Model optional revision
     if let Some(ref revision) = args.revision {
         router_args.push("--revision".to_string());
@@ -1042,6 +1180,13 @@ fn spawn_webserver(
         envs.push(("HUGGING_FACE_HUB_TOKEN".into(), api_token.into()))
     };
 
+    // Parse Compute type
+    if let Ok(compute_type) = env::var("COMPUTE_TYPE") {
+        envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
+    } else if let Some(compute_type) = compute_type(num_shard) {
+        envs.push(("COMPUTE_TYPE".into(), compute_type.into()))
+    }
+
     let mut webserver = match Command::new("text-generation-router")
         .args(router_args)
         .envs(envs)
@@ -1134,19 +1279,129 @@ fn main() -> Result<(), LauncherError> {
 
     tracing::info!("{:?}", args);
 
+    let get_max_position_embeddings = || -> Result<usize, Box<dyn std::error::Error>> {
+        let model_id = args.model_id.clone();
+        let mut path = std::path::Path::new(&args.model_id).to_path_buf();
+        let filename = if !path.exists() {
+            // Assume it's a hub id
+            let api = Api::new()?;
+            let repo = if let Some(ref revision) = args.revision {
+                api.repo(Repo::with_revision(
+                    model_id,
+                    RepoType::Model,
+                    revision.to_string(),
+                ))
+            } else {
+                api.model(model_id)
+            };
+            repo.get("config.json")?
+        } else {
+            path.push("config.json");
+            path
+        };
+
+        let content = std::fs::read_to_string(filename)?;
+        let config: Config = serde_json::from_str(&content)?;
+
+        // Quantization usually means you're even more RAM constrained.
+        let max_default = 4096;
+
+        let max_position_embeddings = match (config.max_position_embeddings, config.max_seq_len) {
+            (Some(max_position_embeddings), _) | (None, Some(max_position_embeddings)) => {
+                if max_position_embeddings > max_default {
+                    let max = max_position_embeddings;
+                    tracing::info!("Model supports up to {max} but tgi will now set its default to {max_default} instead. This is to save VRAM by refusing large prompts in order to allow more users on the same hardware. You can increase that size using `--max-batch-prefill-tokens={} --max-total-tokens={max} --max-input-tokens={}`.", max + 50, max - 1);
+                    max_default
+                } else {
+                    max_position_embeddings
+                }
+            }
+            _ => {
+                return Err(Box::new(LauncherError::ArgumentValidation(
+                    "no max defined".to_string(),
+                )));
+            }
+        };
+        Ok(max_position_embeddings)
+    };
+    let max_position_embeddings: usize = get_max_position_embeddings().unwrap_or(4096);
+
+    let max_input_tokens = {
+        match (args.max_input_tokens, args.max_input_length) {
+            (Some(max_input_tokens), Some(max_input_length)) => {
+                return Err(LauncherError::ArgumentValidation(
+                    format!("Both `max_input_tokens` ({max_input_tokens}) and `max_input_length` ({max_input_length}) are set. Please define only `max_input_tokens` as `max_input_length is deprecated for naming consistency.",
+                )));
+            }
+            (Some(max_input_tokens), None) | (None, Some(max_input_tokens)) => max_input_tokens,
+            (None, None) => {
+                let value = max_position_embeddings - 1;
+                tracing::info!("Default `max_input_tokens` to {value}");
+                value
+            }
+        }
+    };
+    let max_total_tokens = {
+        match args.max_total_tokens {
+            Some(max_total_tokens) => max_total_tokens,
+            None => {
+                let value = max_position_embeddings;
+                tracing::info!("Default `max_total_tokens` to {value}");
+                value
+            }
+        }
+    };
+    let max_batch_prefill_tokens = {
+        match args.max_batch_prefill_tokens {
+            Some(max_batch_prefill_tokens) => max_batch_prefill_tokens,
+            None => {
+                let value: u32 = if let Some(max_batch_size) = args.max_batch_size {
+                    max_batch_size * max_input_tokens
+                } else {
+                    // Adding some edge in order to account for potential block_size alignement
+                    // issue.
+                    max_input_tokens + 50
+                } as u32;
+                tracing::info!("Default `max_batch_prefill_tokens` to {value}");
+                value
+            }
+        }
+    };
+
     // Validate args
-    if args.max_input_length >= args.max_total_tokens {
+    if max_input_tokens >= max_total_tokens {
         return Err(LauncherError::ArgumentValidation(
-            "`max_input_length` must be < `max_total_tokens`".to_string(),
+            "`max_input_tokens must be < `max_total_tokens`".to_string(),
         ));
     }
-    if args.max_input_length as u32 > args.max_batch_prefill_tokens {
+    if max_input_tokens as u32 > max_batch_prefill_tokens {
         return Err(LauncherError::ArgumentValidation(format!(
-            "`max_batch_prefill_tokens` must be >= `max_input_length`. Given: {} and {}",
-            args.max_batch_prefill_tokens, args.max_input_length
+            "`max_batch_prefill_tokens` must be >= `max_input_tokens`. Given: {} and {}",
+            max_batch_prefill_tokens, max_input_tokens
         )));
     }
 
+    let cuda_graphs = match (&args.cuda_graphs, &args.quantize) {
+        (Some(cuda_graphs), Some(_q)) => cuda_graphs.clone(),
+        #[allow(deprecated)]
+        (
+            None,
+            Some(
+                Quantization::Bitsandbytes
+                | Quantization::BitsandbytesNF4
+                | Quantization::BitsandbytesFP4,
+            ),
+        ) => {
+            tracing::info!("Bitsandbytes doesn't work with cuda graphs, deactivating them");
+            vec![]
+        }
+        _ => {
+            let cuda_graphs = vec![1, 2, 4, 8, 16, 32];
+            tracing::info!("Using default cuda graphs {cuda_graphs:?}");
+            cuda_graphs
+        }
+    };
+
     if args.validation_workers == 0 {
         return Err(LauncherError::ArgumentValidation(
             "`validation_workers` must be > 0".to_string(),
@@ -1165,16 +1420,16 @@ fn main() -> Result<(), LauncherError> {
     }
 
     if let Some(ref max_batch_total_tokens) = args.max_batch_total_tokens {
-        if args.max_batch_prefill_tokens > *max_batch_total_tokens {
+        if max_batch_prefill_tokens > *max_batch_total_tokens {
             return Err(LauncherError::ArgumentValidation(format!(
                 "`max_batch_prefill_tokens` must be <= `max_batch_total_tokens`. Given: {} and {}",
-                args.max_batch_prefill_tokens, max_batch_total_tokens
+                max_batch_prefill_tokens, max_batch_total_tokens
             )));
         }
-        if args.max_total_tokens as u32 > *max_batch_total_tokens {
+        if max_total_tokens as u32 > *max_batch_total_tokens {
             return Err(LauncherError::ArgumentValidation(format!(
                 "`max_total_tokens` must be <= `max_batch_total_tokens`. Given: {} and {}",
-                args.max_total_tokens, max_batch_total_tokens
+                max_total_tokens, max_batch_total_tokens
             )));
         }
     }
@@ -1221,12 +1476,14 @@ fn main() -> Result<(), LauncherError> {
     spawn_shards(
         num_shard,
         &args,
+        cuda_graphs,
         shutdown.clone(),
         &shutdown_receiver,
         shutdown_sender,
         &status_receiver,
         status_sender,
         running.clone(),
+        max_total_tokens,
     )?;
 
     // We might have received a termination signal
@@ -1235,11 +1492,19 @@ fn main() -> Result<(), LauncherError> {
         return Ok(());
     }
 
-    let mut webserver =
-        spawn_webserver(args, shutdown.clone(), &shutdown_receiver).map_err(|err| {
-            shutdown_shards(shutdown.clone(), &shutdown_receiver);
-            err
-        })?;
+    let mut webserver = spawn_webserver(
+        num_shard,
+        args,
+        max_input_tokens,
+        max_total_tokens,
+        max_batch_prefill_tokens,
+        shutdown.clone(),
+        &shutdown_receiver,
+    )
+    .map_err(|err| {
+        shutdown_shards(shutdown.clone(), &shutdown_receiver);
+        err
+    })?;
 
     // Default exit code
     let mut exit_code = Ok(());
diff --git a/load_tests/common.js b/load_tests/common.js
index be812e9b..06d2506f 100644
--- a/load_tests/common.js
+++ b/load_tests/common.js
@@ -7,7 +7,9 @@ const seed = 0;
 
 const host = __ENV.HOST || '127.0.0.1:8000';
 const timePerToken = new Trend('time_per_token', true);
-const throughput = new Counter('tokens_per_s');
+const tokens = new Counter('tokens');
+const new_tokens = new Counter('new_tokens');
+const input_tokens = new Counter('input_tokens');
 
 randomSeed(seed);
 // const shareGPT = JSON.parse(open("ShareGPT_V3_unfiltered_cleaned_split.json"))
@@ -19,7 +21,7 @@ export function get_options(reference_latency_ms){
         thresholds: {
             http_req_failed: ['rate==0'],
             time_per_token: [{
-                threshold: `p(50)<${3 * reference_latency_ms}`,
+                threshold: `p(50)<${5 * reference_latency_ms}`,
                 abortOnFail: true,
                 delayAbortEval: '10s'
             }],
@@ -28,7 +30,7 @@ export function get_options(reference_latency_ms){
             load_test: {
                 executor: 'constant-arrival-rate',
                 duration: '60s',
-                preAllocatedVUs: 100,
+                preAllocatedVUs: 10,
                 rate: 10,
                 timeUnit: '1s',
             },
@@ -48,17 +50,22 @@ export function run(host, generate_payload, max_new_tokens) {
         return;
     }
 
+
     check(res, {
         'Post status is 200': (r) => res.status === 200,
     });
-    const n_tokens = max_new_tokens;
-    const timings = res.timings.duration;
+    const duration = res.timings.duration;
 
     if (res.status === 200) {
-        const latency_ms_per_token = timings / n_tokens;
+        const body = res.json();
+        const n_tokens = body.details.tokens.length;
+        const latency_ms_per_token = duration / n_tokens;
         timePerToken.add(latency_ms_per_token);
         const latency_in_s = latency_ms_per_token / 1000;
         const individual_throughput = 1 / latency_in_s;
-        throughput.add(individual_throughput);
+        const _input_tokens = body.details.prefill.length;
+        tokens.add(n_tokens + _input_tokens);
+        input_tokens.add(_input_tokens);
+        new_tokens.add(n_tokens);
     }
 }
diff --git a/load_tests/starcoder_load.js b/load_tests/starcoder_load.js
index 76316b65..2f6cb3d6 100644
--- a/load_tests/starcoder_load.js
+++ b/load_tests/starcoder_load.js
@@ -60,4 +60,4 @@ export default function () {
         inferenceTime.add(res.headers["X-Inference-Time"]);
         timePerToken.add(res.headers["X-Time-Per-Token"]);
     }
-}
\ No newline at end of file
+}
diff --git a/load_tests/tgi.js b/load_tests/tgi.js
index 93a0e278..6c559a9f 100644
--- a/load_tests/tgi.js
+++ b/load_tests/tgi.js
@@ -1,13 +1,13 @@
 import { get_options, run } from "./common.js";
- 
-const reference_latency_ms = 30;
+
+const reference_latency_ms = 70;
 const host = __ENV.HOST || '127.0.0.1:8000';
 const max_new_tokens = 50;
 
 
 function generate_payload(gpt){
     const input = gpt["conversations"][0]["value"];
-    return {"inputs": input, "parameters": {"max_new_tokens": max_new_tokens, "temperature" : 0.5}}
+    return {"inputs": input, "parameters": {"max_new_tokens": max_new_tokens, "decoder_input_details": true}}
 }
 
 export const options = get_options(reference_latency_ms);
diff --git a/load_tests/vllm.js b/load_tests/vllm.js
index fcb38262..1edc039a 100644
--- a/load_tests/vllm.js
+++ b/load_tests/vllm.js
@@ -1,5 +1,5 @@
 import { get_options, run } from "./common.js";
- 
+
 const reference_latency_ms = 22;
 const host = __ENV.HOST || '127.0.0.1:8000';
 const max_new_tokens = 50;
diff --git a/proto/generate.proto b/proto/generate.proto
index c7f9f3c1..9921faea 100644
--- a/proto/generate.proto
+++ b/proto/generate.proto
@@ -1,6 +1,6 @@
 syntax = "proto3";
 
-package generate.v1;
+package generate.v2;
 
 service TextGenerationService {
     /// Model Info
@@ -32,6 +32,7 @@ message InfoResponse {
     string dtype = 2;
     string device_type = 3;
     optional uint32 window_size = 4;
+    uint32 speculate = 5;
 }
 
 /// Empty request
@@ -50,6 +51,12 @@ message ClearCacheRequest {
 /// Empty response
 message ClearCacheResponse {}
 
+enum GrammarType {
+    GRAMMAR_TYPE_NONE = 0;
+    GRAMMAR_TYPE_JSON = 1;
+    GRAMMAR_TYPE_REGEX = 2;
+}
+
 message NextTokenChooserParameters {
     /// exponential scaling output probability distribution
     float temperature = 1;
@@ -65,8 +72,14 @@ message NextTokenChooserParameters {
     uint64 seed = 6;
     /// repetition penalty
     float repetition_penalty = 7;
+    /// frequency penalty
+    float frequency_penalty = 9;
     /// token watermarking using "A Watermark for Large Language Models"
     bool watermark = 8;
+    /// grammar (applied if not empty)
+    string grammar = 10;
+    /// grammar type
+    GrammarType grammar_type = 11;
 }
 
 message StoppingCriteriaParameters {
@@ -135,43 +148,27 @@ message GeneratedText {
     optional uint64 seed = 4;
 }
 
-message PrefillTokens {
-    /// Prefill Token IDs
+message Tokens {
+    /// Token IDs
     repeated uint32 ids = 1;
-    /// Prefill Logprobs
+    /// Logprobs
     repeated float logprobs = 2;
-    /// Prefill tokens
+    /// tokens
     repeated string texts = 3;
-}
-
-message TopTokens {
-    /// Top Token IDs
-    repeated uint32 ids = 1;
-    /// Top Logprobs
-    repeated float logprobs = 2;
-    /// Top Token Texts
-    repeated string texts = 3;
-    /// If the tokens are special
-    repeated bool is_special = 6;
+    /// special
+    repeated bool is_special = 4;
 }
 
 message Generation {
     /// Request ID
     uint64 request_id = 1;
     /// Prefill tokens (optional)
-    PrefillTokens prefill_tokens = 2;
-    /// Token ID
-    uint32 token_id = 3;
-    /// Logprob
-    float token_logprob = 4;
-    /// Text
-    string token_text = 5;
-    /// Is it a special token
-    bool token_is_special = 6;
+    Tokens prefill_tokens = 2;
+    Tokens tokens = 3;
     /// Complete generated text
-    optional GeneratedText generated_text = 7;
+    optional GeneratedText generated_text = 4;
     /// Top tokens
-    TopTokens top_tokens = 8;
+    repeated Tokens top_tokens = 5;
 }
 
 message FilterBatchRequest {
@@ -197,6 +194,12 @@ message PrefillResponse {
     repeated Generation generations = 1;
     /// Next batch (cached)
     optional CachedBatch batch = 2;
+    /// Forward elapsed time in nanoseconds
+    uint64 forward_ns = 3;
+    /// Decode elapsed time in nanoseconds
+    uint64 decode_ns = 4;
+    /// Total elapsed time in nanoseconds
+    uint64 total_ns = 5;
 }
 
 message DecodeRequest {
@@ -209,14 +212,24 @@ message DecodeResponse {
     repeated Generation generations = 1;
     /// Next batch (cached)
     optional CachedBatch batch = 2;
+    /// Forward elapsed time in nanoseconds
+    uint64 forward_ns = 3;
+    /// Decode elapsed time in nanoseconds
+    uint64 decode_ns = 4;
+    /// Total elapsed time in nanoseconds
+    uint64 total_ns = 5;
+    /// Concatenate elapsed time in nanoseconds
+    optional uint64 concat_ns = 6;
 }
 
 message WarmupRequest {
     /// Batch to warmup on
     repeated Batch batches = 1;
+    uint32 max_input_length = 2;
+    uint32 max_prefill_tokens = 3;
+    uint32 max_total_tokens = 4;
 }
 
-/// Empty response
 message WarmupResponse {
     /// Maximum number of tokens supported by the model
     optional uint32 max_supported_total_tokens = 1;
diff --git a/router/Cargo.toml b/router/Cargo.toml
index 55af635a..582bbdfb 100644
--- a/router/Cargo.toml
+++ b/router/Cargo.toml
@@ -21,6 +21,8 @@ axum-tracing-opentelemetry = "0.14.1"
 text-generation-client = { path = "client" }
 clap = { version = "4.4.5", features = ["derive", "env"] }
 futures = "0.3.28"
+hf-hub = { version = "0.3.0", features = ["tokio"] }
+jsonschema = { version = "0.17.1", features = ["draft202012"] }
 metrics = "0.21.1"
 metrics-exporter-prometheus = { version = "0.12.1", features = [] }
 nohash-hasher = "0.2.0"
@@ -31,7 +33,7 @@ reqwest = { version = "0.11.20", features = [] }
 serde = "1.0.188"
 serde_json = "1.0.107"
 thiserror = "1.0.48"
-tokenizers = { version = "0.14.0", features = ["http"] }
+tokenizers = { version = "0.15.1", features = ["http"] }
 tokio = { version = "1.32.0", features = ["rt", "rt-multi-thread", "parking_lot", "signal", "sync"] }
 tokio-stream = "0.1.14"
 tower-http = { version = "0.4.4", features = ["cors"] }
@@ -41,8 +43,13 @@ tracing-subscriber = { version = "0.3.17", features = ["json", "env-filter"] }
 utoipa = { version = "3.5.0", features = ["axum_extras"] }
 utoipa-swagger-ui = { version = "3.1.5", features = ["axum"] }
 ngrok = { version = "0.13.1", features = ["axum"], optional = true }
-hf-hub = "0.3.1"
 init-tracing-opentelemetry = { version = "0.14.1", features = ["opentelemetry-otlp"] }
+minijinja = { git = "https://github.com/mitsuhiko/minijinja.git", rev = "5cd4efb" }
+futures-util = "0.3.30"
+regex = "1.10.3"
+once_cell = "1.19.0"
+image = "0.25.1"
+base64 = "0.22.0"
 
 [build-dependencies]
 vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }
@@ -50,3 +57,4 @@ vergen = { version = "8.2.5", features = ["build", "git", "gitcl"] }
 [features]
 default = ["ngrok"]
 ngrok = ["dep:ngrok"]
+google = []
diff --git a/router/README.md b/router/README.md
index c18d4f9e..5b1f9e36 100644
--- a/router/README.md
+++ b/router/README.md
@@ -28,7 +28,7 @@ this is controlled by the client, and therefore the amount of batching is decide
 beforehand.
 
 For text-generation, and LLMs which are memory bound we can try to be much more
-efficient with the available compute, by having client sending us single queries, 
+efficient with the available compute, by having client sending us single queries,
 and let the router mix&match queries into or out of batches to make the use the
 compute the most efficiently. This is possible because for LLMs the total compute
 for running the model is much bigger than doing mix&match of the batches themselves.
@@ -89,5 +89,5 @@ most critical perceived quality of an LLM API.
 With token streaming, the server can start answering after the first `prefill` pass
 directly, without waiting for all the generation to be done. For extremely long queries
 this means clients can start to see something happening orders of magnitude before
-the work is done. Seeing something in progress allows them to cut short if it's not 
+the work is done. Seeing something in progress allows them to cut short if it's not
 what's wanted but also it "feels" better.
diff --git a/router/client/src/client.rs b/router/client/src/client.rs
index 486e13d9..51b75a49 100644
--- a/router/client/src/client.rs
+++ b/router/client/src/client.rs
@@ -1,13 +1,14 @@
 /// Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
 /// Single shard Client
-use crate::pb::generate::v1::text_generation_service_client::TextGenerationServiceClient;
-use crate::pb::generate::v1::*;
+use crate::pb::generate::v2::text_generation_service_client::TextGenerationServiceClient;
+use crate::pb::generate::v2::*;
 use crate::Result;
 use std::env;
 use rand::{distributions::Uniform, Rng};
 use grpc_metadata::InjectTelemetryContext;
 use std::cmp;
+use std::time::Duration;
 use tonic::transport::{Channel, Uri};
 use tracing::instrument;
 
@@ -108,7 +109,7 @@ impl Client {
         max_input_length: u32,
         max_prefill_tokens: u32,
         max_total_tokens: u32,
-        max_batch_total_tokens: Option<u32>,
+        max_batch_size: Option<usize>,
     ) -> Result<Option<u32>> {
         let warmup_enabled: bool = env::var("WARMUP_ENABLED").ok().map_or(true, |value| value.to_lowercase() == "true");
         if !warmup_enabled {
@@ -141,17 +142,9 @@ impl Client {
             }
         }
 
+        // if max_batch_size is None, create two batches
+        let num_batches = max_batch_size.unwrap_or(2).min(2);
         let mut id_counter: u64 = 0;
-        let num_batches = match max_batch_total_tokens {
-            Some(val) => {
-                if val == max_total_tokens {
-                    1
-                } else {
-                    2
-                }
-            }
-            None => 2, // If max_batch_total_tokens is None, create two batches
-        };
         for shape in shapes.iter() {
             // create two batches in order to trigger concatenate operation
             // in case decode bs=1 create one batch
@@ -166,7 +159,12 @@ impl Client {
                 );
                 num_batches
             ];
-            let request = tonic::Request::new(WarmupRequest { batches }).inject_context();
+            let request = tonic::Request::new(WarmupRequest {
+                batches,
+                max_input_length,
+                max_prefill_tokens,
+                max_total_tokens,
+            }).inject_context();
             let _response = self.stub.warmup(request).await?.into_inner();
         }
 
@@ -187,7 +185,12 @@ impl Client {
                 );
                 num_batches
             ];
-            let request = tonic::Request::new(WarmupRequest { batches }).inject_context();
+            let request = tonic::Request::new(WarmupRequest {
+                batches,
+                max_input_length,
+                max_prefill_tokens,
+                max_total_tokens,
+            }).inject_context();
             let _response = self.stub.warmup(request).await?.into_inner();
         }
         Ok(None) // No support for maximum total tokens
@@ -216,7 +219,10 @@ impl Client {
                     do_sample: false,
                     seed: 0,
                     repetition_penalty: 1.0,
+                    frequency_penalty: 0.0,
                     watermark: false,
+                    grammar: String::new(),
+                    grammar_type: GrammarType::None as i32,
                 })
             } else {
                 Some(NextTokenChooserParameters {
@@ -227,7 +233,10 @@ impl Client {
                     do_sample: true,
                     seed: 0,
                     repetition_penalty: 1.2,
+                    frequency_penalty: 0.1,
                     watermark: false,
+                    grammar: String::new(),
+                    grammar_type: GrammarType::None as i32,
                 })
             };
             requests.push(Request {
@@ -294,10 +303,14 @@ impl Client {
     pub async fn prefill(
         &mut self,
         batch: Batch,
-    ) -> Result<(Vec<Generation>, Option<CachedBatch>)> {
+    ) -> Result<(Vec<Generation>, Option<CachedBatch>, PrefillTimings)> {
         let request = tonic::Request::new(PrefillRequest { batch: Some(batch) }).inject_context();
         let response = self.stub.prefill(request).await?.into_inner();
-        Ok((response.generations, response.batch))
+        Ok((
+            response.generations,
+            response.batch,
+            PrefillTimings::new(response.forward_ns, response.decode_ns, response.total_ns),
+        ))
     }
 
     /// Generate one token for each request in the given cached batches
@@ -308,9 +321,52 @@ impl Client {
     pub async fn decode(
         &mut self,
         batches: Vec<CachedBatch>,
-    ) -> Result<(Vec<Generation>, Option<CachedBatch>)> {
+    ) -> Result<(Vec<Generation>, Option<CachedBatch>, DecodeTimings)> {
         let request = tonic::Request::new(DecodeRequest { batches }).inject_context();
         let response = self.stub.decode(request).await?.into_inner();
-        Ok((response.generations, response.batch))
+        Ok((
+            response.generations,
+            response.batch,
+            DecodeTimings::new(
+                response.concat_ns,
+                response.forward_ns,
+                response.decode_ns,
+                response.total_ns,
+            ),
+        ))
+    }
+}
+
+pub struct PrefillTimings {
+    pub forward: Duration,
+    pub decode: Duration,
+    pub total: Duration,
+}
+
+impl PrefillTimings {
+    fn new(forward_ns: u64, decode_ns: u64, total_ns: u64) -> Self {
+        Self {
+            forward: Duration::from_nanos(forward_ns),
+            decode: Duration::from_nanos(decode_ns),
+            total: Duration::from_nanos(total_ns),
+        }
+    }
+}
+
+pub struct DecodeTimings {
+    pub concat: Option<Duration>,
+    pub forward: Duration,
+    pub decode: Duration,
+    pub total: Duration,
+}
+
+impl DecodeTimings {
+    fn new(concat_ns: Option<u64>, forward_ns: u64, decode_ns: u64, total_ns: u64) -> Self {
+        Self {
+            concat: concat_ns.map(Duration::from_nanos),
+            forward: Duration::from_nanos(forward_ns),
+            decode: Duration::from_nanos(decode_ns),
+            total: Duration::from_nanos(total_ns),
+        }
     }
 }
diff --git a/router/client/src/lib.rs b/router/client/src/lib.rs
index f334be21..6782d9ff 100644
--- a/router/client/src/lib.rs
+++ b/router/client/src/lib.rs
@@ -6,11 +6,11 @@ mod pb;
 mod sharded_client;
 
 pub use client::Client;
-pub use pb::generate::v1::HealthResponse;
-pub use pb::generate::v1::InfoResponse as ShardInfo;
-pub use pb::generate::v1::{
-    Batch, CachedBatch, FinishReason, GeneratedText, Generation, NextTokenChooserParameters,
-    PrefillTokens, Request, StoppingCriteriaParameters,
+pub use pb::generate::v2::HealthResponse;
+pub use pb::generate::v2::InfoResponse as ShardInfo;
+pub use pb::generate::v2::{
+    Batch, CachedBatch, FinishReason, GeneratedText, Generation, GrammarType,
+    NextTokenChooserParameters, Request, StoppingCriteriaParameters, Tokens,
 };
 pub use sharded_client::ShardedClient;
 use thiserror::Error;
diff --git a/router/client/src/pb/.gitignore b/router/client/src/pb/.gitignore
index b46a4c42..6f5f3d11 100644
--- a/router/client/src/pb/.gitignore
+++ b/router/client/src/pb/.gitignore
@@ -1 +1 @@
-*.rs
\ No newline at end of file
+*.rs
diff --git a/router/client/src/sharded_client.rs b/router/client/src/sharded_client.rs
index 062ec102..e2c800dd 100644
--- a/router/client/src/sharded_client.rs
+++ b/router/client/src/sharded_client.rs
@@ -1,5 +1,6 @@
 /// Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
+use crate::client::{DecodeTimings, PrefillTimings};
 /// Multi shard Client
 use crate::{Batch, CachedBatch, Client, Generation, HealthResponse, ShardInfo};
 use crate::{ClientError, Result};
@@ -98,13 +99,18 @@ impl ShardedClient {
         max_input_length: u32,
         max_prefill_tokens: u32,
         max_total_tokens: u32,
-        max_batch_total_tokens: Option<u32>,
+        max_batch_size: Option<usize>,
     ) -> Result<Option<u32>> {
         let futures: Vec<_> = self
             .clients
             .iter_mut()
             .map(|client| {
-                Box::pin(client.warmup(max_input_length, max_prefill_tokens, max_total_tokens, max_batch_total_tokens))
+                Box::pin(client.warmup(
+                    max_input_length,
+                    max_prefill_tokens,
+                    max_total_tokens,
+                    max_batch_size,
+                ))
             })
             .collect();
         // Take the minimum value
@@ -119,49 +125,65 @@ impl ShardedClient {
     ///
     /// Returns Generation for each request in batch
     /// and the next cached batch
-    #[instrument(skip_all, fields(id = &batch.id, size = &batch.size))]
+    #[instrument(skip_all, fields(id = & batch.id, size = & batch.size))]
     pub async fn prefill(
         &mut self,
         batch: Batch,
-    ) -> Result<(Vec<Generation>, Option<CachedBatch>)> {
+    ) -> Result<(Vec<Generation>, Option<CachedBatch>, PrefillTimings)> {
         let futures: Vec<_> = self
             .clients
             .iter_mut()
             .map(|client| Box::pin(client.prefill(batch.clone())))
             .collect();
-        let results: Result<Vec<(Vec<Generation>, Option<CachedBatch>)>> =
+        #[allow(clippy::type_complexity)]
+        let results: Result<Vec<(Vec<Generation>, Option<CachedBatch>, PrefillTimings)>> =
             join_all(futures).await.into_iter().collect();
-        merge_generations(results?)
+        let mut results = results?;
+
+        let (mut generations, next_batch, mut timings) =
+            results.pop().ok_or(ClientError::EmptyResults)?;
+
+        // Merge generations from different model shards
+        for (mut shard_generations, _, shard_timings) in results.into_iter() {
+            generations.append(&mut shard_generations);
+            // Return the timings of the slowest shard
+            if shard_timings.total > timings.total {
+                timings = shard_timings;
+            }
+        }
+        Ok((generations, next_batch, timings))
     }
 
     /// Generate one token for each request in the given cached batches
     ///
     /// Returns Generation for each request in batches
     /// and the next cached batch
-    #[instrument(skip_all, fields(size = batches.iter().map(|batch|{batch.size}).sum::<u32>()))]
+    #[instrument(skip_all, fields(size = batches.iter().map(| batch | {batch.size}).sum::< u32 > ()))]
     pub async fn decode(
         &mut self,
         batches: Vec<CachedBatch>,
-    ) -> Result<(Vec<Generation>, Option<CachedBatch>)> {
+    ) -> Result<(Vec<Generation>, Option<CachedBatch>, DecodeTimings)> {
         let futures: Vec<_> = self
             .clients
             .iter_mut()
             .map(|client| Box::pin(client.decode(batches.clone())))
             .collect();
-        let results: Result<Vec<(Vec<Generation>, Option<CachedBatch>)>> =
+        #[allow(clippy::type_complexity)]
+        let results: Result<Vec<(Vec<Generation>, Option<CachedBatch>, DecodeTimings)>> =
             join_all(futures).await.into_iter().collect();
-        merge_generations(results?)
+        let mut results = results?;
+
+        let (mut generations, next_batch, mut timings) =
+            results.pop().ok_or(ClientError::EmptyResults)?;
+
+        // Merge generations from different model shards
+        for (mut shard_generations, _, shard_timings) in results.into_iter() {
+            generations.append(&mut shard_generations);
+            // Return the timings of the slowest shard
+            if shard_timings.total > timings.total {
+                timings = shard_timings;
+            }
+        }
+        Ok((generations, next_batch, timings))
     }
 }
-
-/// Merge generations from the different model shards
-fn merge_generations(
-    mut results: Vec<(Vec<Generation>, Option<CachedBatch>)>,
-) -> Result<(Vec<Generation>, Option<CachedBatch>)> {
-    let (mut generations, next_batch) = results.pop().ok_or(ClientError::EmptyResults)?;
-
-    for (mut shard_generations, _) in results.into_iter() {
-        generations.append(&mut shard_generations);
-    }
-    Ok((generations, next_batch))
-}
diff --git a/router/src/config.rs b/router/src/config.rs
new file mode 100644
index 00000000..9b5a2404
--- /dev/null
+++ b/router/src/config.rs
@@ -0,0 +1,158 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(tag = "model_type")]
+#[serde(rename_all = "snake_case")]
+pub struct LlavaNext {
+    text_config: TextConfig,
+    vision_config: VisionConfig,
+    image_grid_pinpoints: Vec<(usize, usize)>,
+}
+
+fn get_anyres_image_grid_shape(
+    height: usize,
+    width: usize,
+    grid_pinpoints: &[(usize, usize)],
+    patch_size: usize,
+) -> (usize, usize) {
+    let (height, width) = select_best_resolution(height, width, grid_pinpoints);
+    (height / patch_size, width / patch_size)
+}
+
+/// Selects the best resolution from a list of possible resolutions based on the original size.
+/// This is done by calculating the effective and wasted resolution for each possible resolution.
+/// The best fit resolution is the one that maximizes the effective resolution and minimizes the wasted resolution.
+fn select_best_resolution(
+    original_height: usize,
+    original_width: usize,
+    possible_resolutions: &[(usize, usize)],
+) -> (usize, usize) {
+    let mut best_fit = None;
+    let mut max_effective_resolution = 0;
+    let mut min_wasted_resolution = f32::NEG_INFINITY;
+
+    for (height, width) in possible_resolutions {
+        let wscale = *width as f32 / original_width as f32;
+        let hscale = *height as f32 / original_height as f32;
+        // f32 partial ord.
+        let scale = if wscale > hscale { hscale } else { wscale };
+        let downscaled_width = (*width as f32 * scale) as usize;
+        let downscaled_height = (*height as f32 * scale) as usize;
+        let effective_resolution = std::cmp::min(
+            downscaled_width * downscaled_height,
+            original_width * original_height,
+        );
+        let wasted_resolution = (width * height) - effective_resolution;
+
+        if effective_resolution > max_effective_resolution
+            || (effective_resolution == max_effective_resolution
+                && (wasted_resolution as f32) < min_wasted_resolution)
+        {
+            max_effective_resolution = effective_resolution;
+            min_wasted_resolution = wasted_resolution as f32;
+            best_fit = Some((*height, *width));
+        }
+    }
+
+    best_fit.unwrap_or((original_height, original_width))
+}
+
+impl LlavaNext {
+    pub fn get_number_of_features(&self, height: usize, width: usize) -> usize {
+        let image_size = self.vision_config.image_size;
+        let patch_size = self.vision_config.patch_size;
+        assert!(image_size % patch_size == 0);
+        let npatches = image_size / patch_size;
+        let (num_patch_height, num_patch_width) =
+            get_anyres_image_grid_shape(height, width, &self.image_grid_pinpoints, image_size);
+        // Ceil
+        let height_of_patch = (height * npatches + width - 1) / width;
+        let unpadded_features = npatches * height_of_patch * num_patch_height * num_patch_width;
+        // They are only added after width
+        let newline_features = height_of_patch * num_patch_width;
+        // The base patch covers the entire image
+        let base_features = npatches.pow(2);
+        unpadded_features + newline_features + base_features
+    }
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(tag = "model_type")]
+#[serde(rename_all = "snake_case")]
+pub struct ClipVisionModel {
+    image_size: usize,
+    patch_size: usize,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(tag = "model_type")]
+#[serde(rename_all = "snake_case")]
+pub enum Config {
+    LlavaNext(LlavaNext),
+    ClipVisionModel(ClipVisionModel),
+    Mistral,
+    Idefics,
+    Ssm,
+    GptBigcode,
+    Santacoder,
+    Bloom,
+    Mpt,
+    GptNeox,
+    Phi,
+    #[serde(rename = "phi-msft")]
+    PhiMsft,
+    Llama,
+    Baichuan,
+    Gemma,
+    Cohere,
+    Drbx,
+    Falcon,
+    Mixtral,
+    Starcoder2,
+    Qwen2,
+    Opt,
+    T5,
+}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub struct TextConfig {}
+
+#[derive(Clone, Debug, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub struct VisionConfig {
+    image_size: usize,
+    patch_size: usize,
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn test_llava_next_features() {
+        let config = LlavaNext {
+            text_config: TextConfig {},
+            vision_config: VisionConfig {
+                image_size: 336,
+                patch_size: 14,
+            },
+            image_grid_pinpoints: vec![
+                (336, 672),
+                (672, 336),
+                (672, 672),
+                (1008, 336),
+                (336, 1008),
+            ],
+        };
+
+        let slots = config.get_number_of_features(640, 640);
+        assert_eq!(slots, 2928);
+        let slots = config.get_number_of_features(480, 640);
+        assert_eq!(slots, 2340);
+        let slots = config.get_number_of_features(899, 1024);
+        assert_eq!(slots, 2732);
+        let slots = config.get_number_of_features(1024, 899);
+        assert_eq!(slots, 3320);
+    }
+}
diff --git a/router/src/health.rs b/router/src/health.rs
index ab290fc1..b05b3094 100644
--- a/router/src/health.rs
+++ b/router/src/health.rs
@@ -1,5 +1,6 @@
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
+use text_generation_client::GrammarType as ProtoGrammarType;
 use text_generation_client::{
     Batch, NextTokenChooserParameters, Request, ShardedClient, StoppingCriteriaParameters,
 };
@@ -43,7 +44,10 @@ impl Health {
                     do_sample: false,
                     seed: 0,
                     repetition_penalty: 1.0,
+                    frequency_penalty: 0.0,
                     watermark: false,
+                    grammar: String::new(),
+                    grammar_type: ProtoGrammarType::None as i32,
                 }),
                 stopping_parameters: Some(StoppingCriteriaParameters {
                     max_new_tokens: 1,
diff --git a/router/src/infer.rs b/router/src/infer.rs
index f108a1dc..388c35bc 100644
--- a/router/src/infer.rs
+++ b/router/src/infer.rs
@@ -2,20 +2,23 @@
 
 /// Batching and inference logic
 use crate::validation::{Validation, ValidationError};
-use crate::{Entry, Queue, Token};
-use crate::{GenerateRequest, PrefillToken};
+use crate::{
+    ChatTemplateInputs, ChatTemplateVersions, Entry, GenerateRequest, GenerateStreamResponse,
+    HubTokenizerConfig, Message, PrefillToken, Queue, Token,
+};
 use futures::future::try_join_all;
+use minijinja::{Environment, ErrorKind, Template};
 use nohash_hasher::IntMap;
 use std::sync::{
     atomic::{AtomicBool, Ordering},
     Arc,
 };
 use text_generation_client::{
-    Batch, CachedBatch, ClientError, GeneratedText, Generation, PrefillTokens, ShardedClient,
+    Batch, CachedBatch, ClientError, GeneratedText, Generation, ShardedClient, Tokens,
 };
 use thiserror::Error;
 use tokio::sync::mpsc::error::SendError;
-use tokio::sync::{mpsc, Notify, OwnedSemaphorePermit, Semaphore, TryAcquireError};
+use tokio::sync::{mpsc, Notify, Semaphore, TryAcquireError};
 use tokio::time::Instant;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 use tokio_stream::StreamExt;
@@ -30,6 +33,8 @@ pub struct Infer {
     queue: Queue,
     /// Shared state
     shared: Arc<Shared>,
+    /// Chat template
+    chat_template: Option<ChatTemplate>,
     /// Inference limit
     limit_concurrent_requests: Arc<Semaphore>,
 }
@@ -40,6 +45,11 @@ struct Shared {
     batching_task: Notify,
 }
 
+/// Raise a exception (custom function) used in the chat templates
+fn raise_exception(err_text: String) -> Result<String, minijinja::Error> {
+    Err(minijinja::Error::new(ErrorKind::SyntaxError, err_text))
+}
+
 impl Infer {
     #[allow(clippy::too_many_arguments)]
     pub(crate) fn new(
@@ -49,12 +59,15 @@ impl Infer {
         max_batch_prefill_tokens: u32,
         max_batch_total_tokens: u32,
         max_waiting_tokens: usize,
+        max_batch_size: Option<usize>,
         max_concurrent_requests: usize,
         requires_padding: bool,
         max_input_length: u32,
         max_total_tokens: u32,
         window_size: Option<u32>,
+        speculate: u32,
         generation_health: Arc<AtomicBool>,
+        tokenizer_config: HubTokenizerConfig,
     ) -> Self {
         // Infer shared state
         let queue = Queue::new(
@@ -62,7 +75,8 @@ impl Infer {
             max_input_length,
             max_total_tokens,
             16,
-            window_size
+            window_size,
+            speculate
         );
         let shared = Arc::new(Shared {
             batching_task: Notify::new(),
@@ -75,11 +89,27 @@ impl Infer {
             max_batch_prefill_tokens,
             max_batch_total_tokens,
             max_waiting_tokens,
+            max_batch_size,
             queue.clone(),
             shared.clone(),
             generation_health,
         ));
 
+        let chat_template = tokenizer_config
+            .chat_template
+            .and_then(|t| match t {
+                ChatTemplateVersions::Single(template) => Some(template),
+                ChatTemplateVersions::Multiple(templates) => templates
+                    .into_iter()
+                    .find(|t| t.name == "default")
+                    .map(|t| t.template),
+            })
+            .map(|t| {
+                // .strip() is not supported in minijinja
+                let t = t.replace(".strip()", " | trim");
+                ChatTemplate::new(t, tokenizer_config.bos_token, tokenizer_config.eos_token)
+            });
+
         // Inference limit with a semaphore
         let semaphore = Arc::new(Semaphore::new(max_concurrent_requests));
 
@@ -87,6 +117,7 @@ impl Infer {
             validation,
             queue,
             shared,
+            chat_template,
             limit_concurrent_requests: semaphore,
         }
     }
@@ -96,13 +127,7 @@ impl Infer {
     pub(crate) async fn generate_stream(
         &self,
         request: GenerateRequest,
-    ) -> Result<
-        (
-            OwnedSemaphorePermit,
-            UnboundedReceiverStream<Result<InferStreamResponse, InferError>>,
-        ),
-        InferError,
-    > {
+    ) -> Result<GenerateStreamResponse, InferError> {
         // Limit concurrent requests by acquiring a permit from the semaphore
         let permit = self
             .clone()
@@ -123,6 +148,7 @@ impl Infer {
 
         // MPSC channel to communicate with the background batching task
         let (response_tx, response_rx) = mpsc::unbounded_channel();
+        let input_length = valid_request.input_length;
 
         // Append the request to the queue
         self.queue.append(Entry {
@@ -139,7 +165,47 @@ impl Infer {
         self.shared.batching_task.notify_one();
 
         // Return stream
-        Ok((permit, UnboundedReceiverStream::new(response_rx)))
+        Ok((
+            permit,
+            input_length,
+            UnboundedReceiverStream::new(response_rx),
+        ))
+    }
+
+    /// Tokenizer the input
+    #[instrument(skip_all)]
+    pub(crate) async fn tokenize(
+        &self,
+        request: GenerateRequest,
+    ) -> Result<Option<tokenizers::Encoding>, InferError> {
+        // Tokenize request
+        let inputs = request.inputs;
+        let truncate = request.parameters.truncate;
+        let encoding = self
+            .validation
+            .tokenize(inputs, truncate)
+            .await
+            .map_err(|err| {
+                tracing::error!("Tokenization {err}");
+                err
+            })?;
+
+        // Return Encoding
+        Ok(encoding.map(|(encoding, _)| encoding))
+    }
+
+    /// Apply the chat template to the chat request
+    #[instrument(skip_all)]
+    pub(crate) fn apply_chat_template(&self, messages: Vec<Message>) -> Result<String, InferError> {
+        self.chat_template
+            .as_ref()
+            .ok_or_else(|| InferError::TemplateError(ErrorKind::TemplateNotFound.into()))?
+            .apply(messages)
+            .map_err(|e| {
+                metrics::increment_counter!("tgi_request_failure", "err" => "template");
+                tracing::error!("{e}");
+                e
+            })
     }
 
     /// Add a new request to the queue and return a InferResponse
@@ -151,7 +217,7 @@ impl Infer {
         let use_top_tokens = request.parameters.top_n_tokens.is_some_and(|x| x > 0);
 
         // Create stream and keep semaphore permit as long as generate lives
-        let (_permit, mut stream) = self.generate_stream(request).await?;
+        let (_permit, _input_length, mut stream) = self.generate_stream(request).await?;
 
         // Return values
         let mut result_prefill = Vec::new();
@@ -205,6 +271,7 @@ impl Infer {
         {
             Ok(InferResponse {
                 prefill: result_prefill,
+                _input_length,
                 tokens: result_tokens,
                 generated_text,
                 queued,
@@ -261,6 +328,42 @@ impl Infer {
     }
 }
 
+#[derive(Clone)]
+struct ChatTemplate {
+    template: Template<'static, 'static>,
+    bos_token: Option<String>,
+    eos_token: Option<String>,
+}
+
+impl ChatTemplate {
+    fn new(template: String, bos_token: Option<String>, eos_token: Option<String>) -> Self {
+        let mut env = Box::new(Environment::new());
+        let template_str = template.into_boxed_str();
+        env.add_function("raise_exception", raise_exception);
+        // leaking env and template_str as read-only, static resources for performance.
+        let template = Box::leak(env)
+            .template_from_str(Box::leak(template_str))
+            .unwrap();
+
+        Self {
+            template,
+            bos_token,
+            eos_token,
+        }
+    }
+
+    fn apply(&self, messages: Vec<Message>) -> Result<String, InferError> {
+        self.template
+            .render(ChatTemplateInputs {
+                messages,
+                bos_token: self.bos_token.as_deref(),
+                eos_token: self.eos_token.as_deref(),
+                add_generation_prompt: true,
+            })
+            .map_err(InferError::TemplateError)
+    }
+}
+
 /// Batching logic
 /// Will be launched in a background Tokio task
 ///
@@ -272,6 +375,7 @@ async fn batching_task(
     max_batch_prefill_tokens: u32,
     max_batch_total_tokens: u32,
     max_waiting_tokens: usize,
+    max_batch_size: Option<usize>,
     queue: Queue,
     shared: Arc<Shared>,
     generation_health: Arc<AtomicBool>,
@@ -285,7 +389,12 @@ async fn batching_task(
         // This batch might be smaller than the maximum batch size if there are not enough requests
         // waiting in the queue
         while let Some((mut entries, batch, span)) = queue
-            .next_batch(None, max_batch_prefill_tokens, max_batch_total_tokens)
+            .next_batch(
+                None,
+                max_batch_size,
+                max_batch_prefill_tokens,
+                max_batch_total_tokens,
+            )
             .await
         {
             let mut cached_batch = prefill(&mut client, batch, &mut entries, &generation_health)
@@ -313,10 +422,11 @@ async fn batching_task(
                 };
 
                 let token_budget = max_batch_total_tokens.saturating_sub(batch_max_tokens);
+                let max_size = max_batch_size.map(|max_size| max_size - batch_size as usize);
 
                 // Try to get a new batch
                 if let Some((mut new_entries, new_batch, span)) = queue
-                    .next_batch(min_size, max_batch_prefill_tokens, token_budget)
+                    .next_batch(min_size, max_size, max_batch_prefill_tokens, token_budget)
                     .await
                 {
                     // Tracking metrics
@@ -388,15 +498,20 @@ async fn prefill(
     metrics::increment_counter!("tgi_batch_inference_count", "method" => "prefill");
 
     match client.prefill(batch).await {
-        Ok((generations, next_batch)) => {
+        Ok((generations, next_batch, timings)) => {
             // Update health
             generation_health.store(true, Ordering::SeqCst);
+
+            let start_filtering_time = Instant::now();
             // Send generated tokens and filter stopped entries
             filter_send_generations(generations, entries);
 
             // Filter next batch and remove requests that were stopped
             let next_batch = filter_batch(client, next_batch, entries).await;
 
+            metrics::histogram!("tgi_batch_forward_duration", timings.forward.as_secs_f64(), "method" => "prefill");
+            metrics::histogram!("tgi_batch_decode_duration", timings.decode.as_secs_f64(), "method" => "prefill");
+            metrics::histogram!("tgi_batch_filter_duration", start_filtering_time.elapsed().as_secs_f64(), "method" => "prefill");
             metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "prefill");
             metrics::increment_counter!("tgi_batch_inference_success", "method" => "prefill");
             next_batch
@@ -425,15 +540,23 @@ async fn decode(
     metrics::increment_counter!("tgi_batch_inference_count", "method" => "decode");
 
     match client.decode(batches).await {
-        Ok((generations, next_batch)) => {
+        Ok((generations, next_batch, timings)) => {
             // Update health
             generation_health.store(true, Ordering::SeqCst);
+
+            let start_filtering_time = Instant::now();
             // Send generated tokens and filter stopped entries
             filter_send_generations(generations, entries);
 
             // Filter next batch and remove requests that were stopped
             let next_batch = filter_batch(client, next_batch, entries).await;
 
+            if let Some(concat_duration) = timings.concat {
+                metrics::histogram!("tgi_batch_concat_duration", concat_duration.as_secs_f64(), "method" => "decode");
+            }
+            metrics::histogram!("tgi_batch_forward_duration", timings.forward.as_secs_f64(), "method" => "decode");
+            metrics::histogram!("tgi_batch_decode_duration", timings.decode.as_secs_f64(), "method" => "decode");
+            metrics::histogram!("tgi_batch_filter_duration", start_filtering_time.elapsed().as_secs_f64(), "method" => "decode");
             metrics::histogram!("tgi_batch_inference_duration", start_time.elapsed().as_secs_f64(), "method" => "decode");
             metrics::increment_counter!("tgi_batch_inference_success", "method" => "decode");
             next_batch
@@ -533,50 +656,63 @@ fn send_responses(
     }
 
     // Create last Token
-    let token = Token {
-        id: generation.token_id,
-        text: generation.token_text,
-        logprob: generation.token_logprob,
-        special: generation.token_is_special,
-    };
-
-    // generation.top_tokens
-
-    let mut top_tokens = Vec::new();
-    if let Some(top_tokens_) = generation.top_tokens {
-        top_tokens.extend(
+    let tokens_ = generation.tokens.expect("Non empty tokens in generation");
+    let n = tokens_.ids.len();
+    metrics::histogram!("tgi_request_skipped_tokens", (n - 1) as f64);
+    let mut iterator = tokens_
+        .ids
+        .into_iter()
+        .zip(tokens_.logprobs)
+        .zip(tokens_.texts)
+        .zip(tokens_.is_special)
+        .enumerate()
+        .peekable();
+    while let Some((i, (((id, logprob), text), special))) = iterator.next() {
+        let token = Token {
+            id,
+            text,
+            logprob,
+            special,
+        };
+        let top_tokens = if let Some(top_tokens_) = generation.top_tokens.get(i) {
             top_tokens_
                 .ids
-                .into_iter()
-                .zip(top_tokens_.logprobs.into_iter())
-                .zip(top_tokens_.texts.into_iter())
-                .zip(top_tokens_.is_special.into_iter())
-                .map(|(((id, logprob), text), special)| Token {
+                .iter()
+                .zip(top_tokens_.logprobs.iter())
+                .zip(top_tokens_.texts.iter())
+                .zip(top_tokens_.is_special.iter())
+                .map(|(((&id, &logprob), text), &special)| Token {
                     id,
-                    text,
+                    text: text.to_string(),
                     logprob,
                     special,
-                }),
-        )
+                })
+                .collect()
+        } else {
+            vec![]
+        };
+        match (&generation.generated_text, iterator.peek()) {
+            (Some(generated_text), None) => {
+                // Generation has ended
+                stopped = true;
+                // Send message
+                entry.response_tx.send(Ok(InferStreamResponse::End {
+                    token,
+                    top_tokens,
+                    generated_text: generated_text.clone(),
+                    queued: entry.queue_time,
+                    start: entry.batch_time.unwrap(),
+                }))?;
+            }
+            _ => {
+                // Send message
+                entry
+                    .response_tx
+                    .send(Ok(InferStreamResponse::Intermediate { token, top_tokens }))?;
+            }
+        }
     }
 
-    if let Some(generated_text) = generation.generated_text {
-        // Generation has ended
-        stopped = true;
-        // Send message
-        entry.response_tx.send(Ok(InferStreamResponse::End {
-            token,
-            top_tokens,
-            generated_text,
-            queued: entry.queue_time,
-            start: entry.batch_time.unwrap(),
-        }))?;
-    } else {
-        // Send message
-        entry
-            .response_tx
-            .send(Ok(InferStreamResponse::Intermediate { token, top_tokens }))?;
-    }
     Ok(stopped)
 }
 
@@ -601,7 +737,7 @@ fn send_errors(error: ClientError, entries: &mut IntMap<u64, Entry>) {
 #[derive(Debug)]
 pub(crate) enum InferStreamResponse {
     // Optional first message
-    Prefill(PrefillTokens),
+    Prefill(Tokens),
     // Intermediate messages
     Intermediate {
         token: Token,
@@ -619,6 +755,10 @@ pub(crate) enum InferStreamResponse {
 
 #[derive(Debug)]
 pub(crate) struct InferResponse {
+    /// input_length is the input as perceived by the rust tokenizer in the
+    /// validation pathway. It is redundant with prefill.len() but prefill
+    /// has data only if the user asked for it. This will always be filled.
+    pub(crate) _input_length: u32,
     pub(crate) prefill: Vec<PrefillToken>,
     pub(crate) tokens: Vec<Token>,
     pub(crate) generated_text: GeneratedText,
@@ -637,6 +777,8 @@ pub enum InferError {
     ValidationError(#[from] ValidationError),
     #[error("Incomplete generation")]
     IncompleteGeneration,
+    #[error("Template error: {0}")]
+    TemplateError(#[from] minijinja::Error),
 }
 
 impl InferError {
@@ -646,6 +788,705 @@ impl InferError {
             InferError::Overloaded(_) => "overloaded",
             InferError::ValidationError(_) => "validation",
             InferError::IncompleteGeneration => "incomplete_generation",
+            InferError::TemplateError(_) => "template_error",
+        }
+    }
+}
+
+// tests
+#[cfg(test)]
+mod tests {
+    use crate::infer::raise_exception;
+    use crate::ChatTemplateInputs;
+    use crate::Message;
+    use minijinja::Environment;
+
+    #[test]
+    fn test_chat_template() {
+        let env = Environment::new();
+
+        let source = r#"
+        {% for message in messages %}
+            {% if message['role'] == 'system' %}
+                {% if message['content']%}
+                    {{'### System:\n' + message['content']+'\n\n'}}
+                {% endif %}
+            {% elif message['role'] == 'user' %}
+                {{'### User:\n' + message['content']+'\n\n'}}
+            {% elif message['role'] == 'assistant' %}
+                {{'### Assistant:\n'  + message['content']}}
+            {% endif %}
+            {% if loop.last and add_generation_prompt %}
+                {{ '### Assistant:\n' }}
+            {% endif %}
+        {% endfor %}"#;
+
+        // trim all the whitespace
+        let source = source
+            .lines()
+            .map(|line| line.trim())
+            .collect::<Vec<&str>>()
+            .join("");
+
+        let tmpl = env.template_from_str(&source);
+
+        let chat_template_inputs = ChatTemplateInputs {
+            messages: vec![
+                Message {
+                    role: "user".to_string(),
+                    content: Some("Hi!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("Hello how can I help?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "user".to_string(),
+                    content: Some("What is Deep Learning?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("magic!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+            ],
+            bos_token: Some("[BOS]"),
+            eos_token: Some("[EOS]"),
+            add_generation_prompt: true,
+        };
+
+        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
+
+        assert_eq!(
+            result,
+            "### User:\nHi!\n\n### Assistant:\nHello how can I help?### User:\nWhat is Deep Learning?\n\n### Assistant:\nmagic!### Assistant:\n"
+        );
+    }
+
+    #[test]
+    fn test_chat_template_invalid_with_raise() {
+        let mut env = Environment::new();
+        env.add_function("raise_exception", raise_exception);
+
+        let source = r#"
+        {{ bos_token }}
+        {% for message in messages %}
+        {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
+        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
+        {% endif %}
+        {% if message['role'] == 'user' %}
+        {{ '[INST] ' + message['content'] + ' [/INST]' }}
+        {% elif message['role'] == 'assistant' %}
+        {{ message['content'] + eos_token}}
+        {% else %}
+        {{ raise_exception('Only user and assistant roles are supported!') }}
+        {% endif %}
+        {% endfor %}"#;
+
+        // trim all the whitespace
+        let source = source
+            .lines()
+            .map(|line| line.trim())
+            .collect::<Vec<&str>>()
+            .join("");
+
+        let tmpl = env.template_from_str(&source);
+
+        let chat_template_inputs = ChatTemplateInputs {
+            messages: vec![
+                Message {
+                    role: "user".to_string(),
+                    content: Some("Hi!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "user".to_string(),
+                    content: Some("Hi again!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("Hello how can I help?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "user".to_string(),
+                    content: Some("What is Deep Learning?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("magic!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+            ],
+            bos_token: Some("[BOS]"),
+            eos_token: Some("[EOS]"),
+            add_generation_prompt: true,
+        };
+
+        let result = tmpl.unwrap().render(chat_template_inputs); //.err().unwrap();
+
+        match result {
+            Ok(_) => panic!("Should have failed"),
+            Err(e) => {
+                assert_eq!(
+                    e.detail().unwrap(),
+                    "Conversation roles must alternate user/assistant/user/assistant/..."
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_chat_template_valid_with_raise() {
+        let mut env = Environment::new();
+        env.add_function("raise_exception", raise_exception);
+
+        let source = r#"
+        {{ bos_token }}
+        {% for message in messages %}
+        {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
+        {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}
+        {% endif %}
+        {% if message['role'] == 'user' %}
+        {{ '[INST] ' + message['content'] + ' [/INST]' }}
+        {% elif message['role'] == 'assistant' %}
+        {{ message['content'] + eos_token}}
+        {% else %}
+        {{ raise_exception('Only user and assistant roles are supported!') }}
+        {% endif %}
+        {% endfor %}"#;
+
+        // trim all the whitespace
+        let source = source
+            .lines()
+            .map(|line| line.trim())
+            .collect::<Vec<&str>>()
+            .join("");
+
+        let tmpl = env.template_from_str(&source);
+
+        let chat_template_inputs = ChatTemplateInputs {
+            messages: vec![
+                Message {
+                    role: "user".to_string(),
+                    content: Some("Hi!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("Hello how can I help?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "user".to_string(),
+                    content: Some("What is Deep Learning?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("magic!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+            ],
+            bos_token: Some("[BOS]"),
+            eos_token: Some("[EOS]"),
+            add_generation_prompt: true,
+        };
+
+        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
+        assert_eq!(result, "[BOS][INST] Hi! [/INST]Hello how can I help?[EOS][INST] What is Deep Learning? [/INST]magic![EOS]");
+    }
+
+    #[test]
+    fn test_chat_template_valid_with_add_generation_prompt() {
+        let mut env = Environment::new();
+        env.add_function("raise_exception", raise_exception);
+
+        let source = r#"
+        {% for message in messages %}
+        {{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}
+        {% endfor %}
+        {% if add_generation_prompt %}
+            {{ '<|im_start|>assistant\n' }}
+        {% endif %}"#;
+
+        // trim all the whitespace
+        let source = source
+            .lines()
+            .map(|line| line.trim())
+            .collect::<Vec<&str>>()
+            .join("");
+
+        let tmpl = env.template_from_str(&source);
+
+        let chat_template_inputs = ChatTemplateInputs {
+            messages: vec![
+                Message {
+                    role: "user".to_string(),
+                    content: Some("Hi!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("Hello how can I help?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "user".to_string(),
+                    content: Some("What is Deep Learning?".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+                Message {
+                    role: "assistant".to_string(),
+                    content: Some("magic!".to_string()),
+                    name: None,
+                    tool_calls: None,
+                },
+            ],
+            bos_token: Some("[BOS]"),
+            eos_token: Some("[EOS]"),
+            add_generation_prompt: true,
+        };
+
+        let result = tmpl.unwrap().render(chat_template_inputs).unwrap();
+        assert_eq!(result, "<|im_start|>user\nHi!<|im_end|>\n<|im_start|>assistant\nHello how can I help?<|im_end|>\n<|im_start|>user\nWhat is Deep Learning?<|im_end|>\n<|im_start|>assistant\nmagic!<|im_end|>\n<|im_start|>assistant\n");
+    }
+
+    struct ChatTemplateTestItem {
+        name: &'static str,
+        chat_template: &'static str,
+        input: ChatTemplateInputs<'static>,
+        target: &'static str,
+    }
+
+    #[test]
+    fn test_many_chat_templates() {
+        let example_chat = vec![
+            Message {
+                role: "user".to_string(),
+                content: Some("Hello, how are you?".to_string()),
+                name: None,
+                tool_calls: None,
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: Some("I'm doing great. How can I help you today?".to_string()),
+                name: None,
+                tool_calls: None,
+            },
+            Message {
+                role: "user".to_string(),
+                content: Some("I'd like to show off how chat templating works!".to_string()),
+                name: None,
+                tool_calls: None,
+            },
+        ];
+
+        let example_chat_with_system = vec![Message {
+            role: "system".to_string(),
+            content: Some(
+                "You are a friendly chatbot who always responds in the style of a pirate"
+                    .to_string(),
+            ),
+            name: None,
+            tool_calls: None,
+        }]
+        .iter()
+        .chain(&example_chat)
+        .cloned()
+        .collect::<Vec<_>>();
+
+        let test_default_templates = vec![
+            ChatTemplateTestItem {
+                name: "_base",
+                chat_template: "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some(""),
+                },
+                target: "<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
+            },
+            ChatTemplateTestItem {
+                name: "blenderbot",
+                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some("</s>"),
+                },
+                target: " Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>",
+            },
+            ChatTemplateTestItem {
+                name: "blenderbot_small",
+                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ '  ' }}{% endif %}{% endfor %}{{ eos_token }}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some("</s>"),
+                },
+                target: " Hello, how are you?  I'm doing great. How can I help you today?   I'd like to show off how chat templating works!</s>",
+            },
+            ChatTemplateTestItem {
+                name: "bloom",
+                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some("</s>"),
+                },
+                target: "Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>",
+            },
+            ChatTemplateTestItem {
+                name: "gpt_neox",
+                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some("<|endoftext|>"),
+                },
+                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
+            },
+            ChatTemplateTestItem {
+                name: "gpt2",
+                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some("<|endoftext|>"),
+                },
+                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
+            },
+            ChatTemplateTestItem {
+                name: "llama",
+                // NOTE: the `.strip()` has been replaced with `| trim` in the following template
+                chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif USE_DEFAULT_PROMPT == true and not '<<SYS>>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'DEFAULT_SYSTEM_MESSAGE' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ bos_token +'[INST] ' + content | trim + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<<SYS>>\\n' + content | trim + '\\n<</SYS>>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content | trim + ' ' + eos_token }}{% endif %}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat_with_system.clone(),
+                    add_generation_prompt: true,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<s>[INST] <<SYS>>\nYou are a friendly chatbot who always responds in the style of a pirate\n<</SYS>>\n\nHello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]",
+            },
+            ChatTemplateTestItem {
+                name: "whisper",
+                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: true,
+                    bos_token: Some(""),
+                    eos_token: Some("<|endoftext|>"),
+                },
+                target: "Hello, how are you?<|endoftext|>I'm doing great. How can I help you today?<|endoftext|>I'd like to show off how chat templating works!<|endoftext|>",
+            },
+        ];
+
+        #[allow(unused_variables)] // name is unused
+        for ChatTemplateTestItem {
+            name,
+            chat_template,
+            input,
+            target,
+        } in test_default_templates
+        {
+            let mut env = Environment::new();
+            env.add_function("raise_exception", raise_exception);
+            let tmpl = env.template_from_str(&chat_template);
+            let result = tmpl.unwrap().render(input).unwrap();
+            assert_eq!(result, target);
+        }
+
+        let test_custom_templates = vec![
+            ChatTemplateTestItem {
+                name: "HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=false)",
+                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat_with_system.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some(""),
+                    eos_token: Some("</s>"),
+                },
+                target: "<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s><|user|>\nHello, how are you?</s><|assistant|>\nI'm doing great. How can I help you today?</s><|user|>\nI'd like to show off how chat templating works!</s>",
+            },
+            ChatTemplateTestItem {
+                name: "HuggingFaceH4/zephyr-7b-beta (add_generation_prompt=true)",
+                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: vec![
+                        Message {
+                            role: "system".to_string(),
+                            content: Some("You are a friendly chatbot who always responds in the style of a pirate".to_string()),
+                            name: None,
+                            tool_calls: None,
+                        },
+                        Message {
+                            role: "user".to_string(),
+                            content: Some("How many helicopters can a human eat in one sitting?".to_string()),
+                            name: None,
+                            tool_calls: None,
+                        },
+                    ],
+                    add_generation_prompt: true,
+                    bos_token: Some(""),
+                    eos_token: Some("</s>"),
+                },
+                target: "<|system|>\nYou are a friendly chatbot who always responds in the style of a pirate</s><|user|>\nHow many helicopters can a human eat in one sitting?</s><|assistant|>",
+            },
+            ChatTemplateTestItem {
+                name: "HuggingFaceH4/zephyr-7b-gemma-v0.1",
+                chat_template: "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<bos>"),
+                    eos_token: Some("<eos>"),
+                },
+                target: "<bos><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
+            },
+            ChatTemplateTestItem {
+                name: "mistralai/Mistral-7B-Instruct-v0.1",
+                chat_template: "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s> [INST] I'd like to show off how chat templating works! [/INST]",
+            },
+            ChatTemplateTestItem {
+                name: "mistralai/Mixtral-8x7B-Instruct-v0.1",
+                chat_template: "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<s>[INST] Hello, how are you? [/INST]I'm doing great. How can I help you today?</s>[INST] I'd like to show off how chat templating works! [/INST]",
+            },
+            ChatTemplateTestItem {
+                name: "cognitivecomputations/dolphin-2.5-mixtral-8x7b",
+                chat_template: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
+            },
+            ChatTemplateTestItem {
+                name: "openchat/openchat-3.5-0106",
+                // `.title()` has been replaced with `| upper` in the following template
+                chat_template: "{{ bos_token }}{% for message in messages %}{{ 'GPT4 Correct ' + (message['role'] | title) + ': ' + message['content'] + '<|end_of_turn|>'}}{% endfor %}{% if add_generation_prompt %}{{ 'GPT4 Correct Assistant:' }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<s>GPT4 Correct User: Hello, how are you?<|end_of_turn|>GPT4 Correct Assistant: I'm doing great. How can I help you today?<|end_of_turn|>GPT4 Correct User: I'd like to show off how chat templating works!<|end_of_turn|>",
+            },
+            ChatTemplateTestItem {
+                name: "upstage/SOLAR-10.7B-Instruct-v1.0",
+                chat_template: "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "Hello, how are you?</s>I'm doing great. How can I help you today?</s>I'd like to show off how chat templating works!</s>",
+            },
+            ChatTemplateTestItem {
+                name: "codellama/CodeLlama-70b-Instruct-hf",
+                // NOTE: `.strip()` has been replaced with `| trim` in the following template
+                chat_template: "{% if messages[0]['role'] == 'system' %}{% set user_index = 1 %}{% else %}{% set user_index = 0 %}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != ((loop.index0 + user_index) % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 %}{{ '<s>' }}{% endif %}{% set content = 'Source: ' + message['role'] + '\\n\\n ' + message['content'] | trim %}{{ content + ' <step> ' }}{% endfor %}{{'Source: assistant\\nDestination: user\\n\\n '}}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<s>Source: user\n\n Hello, how are you? <step> Source: assistant\n\n I'm doing great. How can I help you today? <step> Source: user\n\n I'd like to show off how chat templating works! <step> Source: assistant\nDestination: user\n\n ",
+            },
+            ChatTemplateTestItem {
+                name: "Deci/DeciLM-7B-instruct",
+                chat_template: "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '### User:\\n' + message['content'] }}\n{% elif message['role'] == 'system' %}\n{{ '### System:\\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '### Assistant:\\n'  + message['content'] }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '### Assistant:' }}\n{% endif %}\n{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "### User:\nHello, how are you?### Assistant:\nI'm doing great. How can I help you today?### User:\nI'd like to show off how chat templating works!",
+            },
+            ChatTemplateTestItem {
+                name: "Qwen/Qwen1.5-72B-Chat",
+                chat_template: "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\\nYou are a helpful assistant<|im_end|>\\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\\n' + message['content']}}{% if (loop.last and add_generation_prompt) or not loop.last %}{{ '<|im_end|>' + '\\n'}}{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}{{ '<|im_start|>assistant\\n' }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<|im_start|>system\nYou are a helpful assistant<|im_end|>\n<|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!",
+            },
+            ChatTemplateTestItem {
+                name: "deepseek-ai/deepseek-llm-7b-chat",
+                chat_template: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{{ bos_token }}{% for message in messages %}{% if message['role'] == 'user' %}{{ 'User: ' + message['content'] + '\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ 'Assistant: ' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ message['content'] + '\\n\\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<｜begin▁of▁sentence｜>"),
+                    eos_token: Some("<｜end▁of▁sentence｜>"),
+                },
+                target: "<｜begin▁of▁sentence｜>User: Hello, how are you?\n\nAssistant: I'm doing great. How can I help you today?<｜end▁of▁sentence｜>User: I'd like to show off how chat templating works!\n\n",
+            },
+            ChatTemplateTestItem {
+                name: "h2oai/h2o-danube-1.8b-chat",
+                chat_template: "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|prompt|>' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ '<|system|>' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|answer|>'  + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|answer|>' }}{% endif %}{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<|prompt|>Hello, how are you?</s><|answer|>I'm doing great. How can I help you today?</s><|prompt|>I'd like to show off how chat templating works!</s>",
+            },
+            ChatTemplateTestItem {
+                name: "internlm/internlm2-chat-7b",
+                chat_template: "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>' + '\\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "<s><|im_start|>user\nHello, how are you?<|im_end|>\n<|im_start|>assistant\nI'm doing great. How can I help you today?<|im_end|>\n<|im_start|>user\nI'd like to show off how chat templating works!<|im_end|>\n",
+            },
+            ChatTemplateTestItem {
+                name: "TheBloke/deepseek-coder-33B-instruct-AWQ",
+                chat_template: "{%- set found_item = false -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set found_item = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<｜begin▁of▁sentence｜>"),
+                    eos_token: Some("<|EOT|>"),
+                },
+                target: "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n### Instruction:\nHello, how are you?\n### Response:\nI'm doing great. How can I help you today?\n<|EOT|>\n### Instruction:\nI'd like to show off how chat templating works!\n### Response:\n",
+            },
+            ChatTemplateTestItem {
+                name: "ericzzz/falcon-rw-1b-chat",
+                // `.strip()` has been replaced with `| trim` in the following template
+                chat_template: "{% for message in messages %}{% if loop.index > 1 and loop.previtem['role'] != 'assistant' %}{{ ' ' }}{% endif %}{% if message['role'] == 'system' %}{{ '[SYS] ' + message['content'] | trim }}{% elif message['role'] == 'user' %}{{ '[INST] ' + message['content'] | trim }}{% elif message['role'] == 'assistant' %}{{ '[RESP] '  + message['content'] + eos_token }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ ' [RESP] ' }}{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<|endoftext|>"),
+                    eos_token: Some("<|endoftext|>"),
+                },
+                target: "[INST] Hello, how are you? [RESP] I'm doing great. How can I help you today?<|endoftext|>[INST] I'd like to show off how chat templating works!",
+            },
+            ChatTemplateTestItem {
+                name: "abacusai/Smaug-34B-v0.1",
+                chat_template: "{%- for idx in range(0, messages|length) -%}\n{%- if messages[idx]['role'] == 'user' -%}\n{%- if idx > 1 -%}\n{{- bos_token + '[INST] ' + messages[idx]['content'] + ' [/INST]' -}}\n{%- else -%}\n{{- messages[idx]['content'] + ' [/INST]' -}}\n{%- endif -%}\n{% elif messages[idx]['role'] == 'system' %}\n{{- '[INST] <<SYS>>\\n' + messages[idx]['content'] + '\\n<</SYS>>\\n\\n' -}}\n{%- elif messages[idx]['role'] == 'assistant' -%}\n{{- ' '  + messages[idx]['content'] + ' ' + eos_token -}}\n{% endif %}\n{% endfor %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "Hello, how are you? [/INST] I'm doing great. How can I help you today? </s><s>[INST] I'd like to show off how chat templating works! [/INST]",
+            },
+            ChatTemplateTestItem {
+                name: "maywell/Synatra-Mixtral-8x7B",
+                chat_template: "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n{% for message in messages %}{% if message['role'] == 'user' %}### Instruction:\n{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% elif message['role'] == 'assistant' %}### Response:\n{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% elif message['role'] == 'system' %}{{ message['content']|trim -}}{% if not loop.last %}{% endif %}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}\n### Response:\n{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "Below is an instruction that describes a task. Write a response that appropriately completes the request.### Instruction:Hello, how are you?### Response:I'm doing great. How can I help you today?### Instruction:I'd like to show off how chat templating works!",
+            },
+            ChatTemplateTestItem {
+                name: "deepseek-ai/deepseek-coder-33b-instruct",
+                chat_template: "{% if not add_generation_prompt is defined %}\n{% set add_generation_prompt = false %}\n{% endif %}\n{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{{bos_token}}{%- if not ns.found -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{% if add_generation_prompt %}\n{{'### Response:'}}\n{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<｜begin▁of▁sentence｜>"),
+                    eos_token: Some("</EOT>"),
+                },
+                target: "<｜begin▁of▁sentence｜>You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n### Instruction:\nHello, how are you?\n### Response:\nI'm doing great. How can I help you today?\n<|EOT|>\n### Instruction:\nI'd like to show off how chat templating works!\n",
+            },
+            // NOT INCLUDED
+            // - meetkai/functionary-medium-v2.2
+            // - fireworks-ai/firefunction-v1
+            // https://github
+            ChatTemplateTestItem {
+                name: "maywell/PiVoT-MoE",
+                chat_template: "{{ (messages|selectattr('role', 'equalto', 'system')|list|last).content|trim if (messages|selectattr('role', 'equalto', 'system')|list) else '' }}{% for message in messages %}{% if message['role'] == 'system' %}{{ message['content']|trim }}{% elif message['role'] == 'user' %}### Instruction: {{ message['content']|trim }}{% elif message['role'] == 'assistant' %}### Response: {{ message['content']|trim }}{% elif message['role'] == 'user_context' %}### Input: {{ message['content']|trim }}{% endif %}{% if not loop.last %}\n{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}### Response:{% endif %}",
+                input: ChatTemplateInputs {
+                    messages: example_chat_with_system.clone(),
+                    add_generation_prompt: false,
+                    bos_token: Some("<s>"),
+                    eos_token: Some("</s>"),
+                },
+                target: "You are a friendly chatbot who always responds in the style of a pirateYou are a friendly chatbot who always responds in the style of a pirate### Instruction: Hello, how are you?### Response: I'm doing great. How can I help you today?### Instruction: I'd like to show off how chat templating works!",
+            },
+        ];
+
+        #[allow(unused_variables)] // name is unused
+        for ChatTemplateTestItem {
+            name,
+            chat_template,
+            input,
+            target,
+        } in test_custom_templates
+        {
+            let mut env = Environment::new();
+            env.add_function("raise_exception", raise_exception);
+            // trim all the whitespace
+            let chat_template = chat_template
+                .lines()
+                .map(|line| line.trim())
+                .collect::<Vec<&str>>()
+                .join("");
+
+            let tmpl = env.template_from_str(&chat_template);
+            let result = tmpl.unwrap().render(input).unwrap();
+            assert_eq!(result, target);
         }
     }
 }
diff --git a/router/src/lib.rs b/router/src/lib.rs
index b547dc15..2e412f1a 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod config;
 mod health;
 /// Text Generation Inference Webserver
 mod infer;
@@ -5,12 +6,40 @@ mod queue;
 pub mod server;
 mod validation;
 
-use infer::Infer;
+use infer::{Infer, InferError, InferStreamResponse};
 use queue::{Entry, Queue};
 use serde::{Deserialize, Serialize};
+use tokio::sync::OwnedSemaphorePermit;
+use tokio_stream::wrappers::UnboundedReceiverStream;
 use utoipa::ToSchema;
 use validation::Validation;
 
+/// Type alias for generation responses
+pub(crate) type GenerateStreamResponse = (
+    OwnedSemaphorePermit,
+    u32, // input_length
+    UnboundedReceiverStream<Result<InferStreamResponse, InferError>>,
+);
+
+#[derive(Clone, Deserialize, ToSchema)]
+pub(crate) struct VertexInstance {
+    #[schema(example = "What is Deep Learning?")]
+    pub inputs: String,
+    #[schema(nullable = true, default = "null", example = "null")]
+    pub parameters: Option<GenerateParameters>,
+}
+
+#[derive(Deserialize, ToSchema)]
+pub(crate) struct VertexRequest {
+    #[serde(rename = "instances")]
+    pub instances: Vec<VertexInstance>,
+}
+
+#[derive(Clone, Deserialize, ToSchema, Serialize)]
+pub(crate) struct VertexResponse {
+    pub predictions: Vec<String>,
+}
+
 /// Hub type
 #[derive(Clone, Debug, Deserialize)]
 pub struct HubModelInfo {
@@ -20,6 +49,78 @@ pub struct HubModelInfo {
     pub pipeline_tag: Option<String>,
 }
 
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+pub struct ChatTemplate {
+    name: String,
+    template: String,
+}
+
+#[derive(Debug, Clone, Deserialize, PartialEq)]
+#[serde(untagged)]
+pub enum ChatTemplateVersions {
+    Single(String),
+    Multiple(Vec<ChatTemplate>),
+}
+
+#[derive(Debug, Clone, Deserialize, Default)]
+pub struct HubTokenizerConfig {
+    pub chat_template: Option<ChatTemplateVersions>,
+    pub completion_template: Option<String>,
+    #[serde(deserialize_with = "token_serde::deserialize")]
+    pub bos_token: Option<String>,
+    #[serde(deserialize_with = "token_serde::deserialize")]
+    pub eos_token: Option<String>,
+}
+
+impl HubTokenizerConfig {
+    pub fn from_file(filename: &std::path::Path) -> Self {
+        let content = std::fs::read_to_string(filename).unwrap();
+        serde_json::from_str(&content).unwrap_or_default()
+    }
+}
+
+#[derive(Clone, Debug, Deserialize, ToSchema)]
+#[serde(tag = "type", content = "value")]
+pub(crate) enum GrammarType {
+    /// A string that represents a [JSON Schema](https://json-schema.org/).
+    ///
+    /// JSON Schema is a declarative language that allows to annotate JSON documents
+    /// with types and descriptions.
+    #[serde(rename = "json")]
+    #[schema(example = json ! ({"properties": {"location":{"type": "string"}}}))]
+    Json(serde_json::Value),
+    #[serde(rename = "regex")]
+    Regex(String),
+}
+
+mod token_serde {
+    use super::*;
+    use serde::de;
+    use serde::Deserializer;
+    use serde_json::Value;
+
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<String>, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let value = Value::deserialize(deserializer)?;
+
+        match value {
+            Value::String(s) => Ok(Some(s)),
+            Value::Object(map) => {
+                if let Some(content) = map.get("content").and_then(|v| v.as_str()) {
+                    Ok(Some(content.to_string()))
+                } else {
+                    Err(de::Error::custom(
+                        "content key not found in structured token",
+                    ))
+                }
+            }
+            _ => Err(de::Error::custom("invalid token format")),
+        }
+    }
+}
+
 #[derive(Clone, Debug, Serialize, ToSchema)]
 pub struct Info {
     /// Model info
@@ -50,6 +151,8 @@ pub struct Info {
     pub max_batch_total_tokens: u32,
     #[schema(example = "20")]
     pub max_waiting_tokens: usize,
+    #[schema(nullable = true, example = "null")]
+    pub max_batch_size: Option<usize>,
     #[schema(example = "2")]
     pub validation_workers: usize,
     /// Router Info
@@ -61,7 +164,7 @@ pub struct Info {
     pub docker_label: Option<&'static str>,
 }
 
-#[derive(Clone, Debug, Deserialize, ToSchema)]
+#[derive(Clone, Debug, Deserialize, ToSchema, Default)]
 pub(crate) struct GenerateParameters {
     #[serde(default)]
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 1)]
@@ -83,6 +186,14 @@ pub(crate) struct GenerateParameters {
     )]
     pub repetition_penalty: Option<f32>,
     #[serde(default)]
+    #[schema(
+        exclusive_minimum = -2.0,
+        nullable = true,
+        default = "null",
+        example = 0.1
+    )]
+    pub frequency_penalty: Option<f32>,
+    #[serde(default)]
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 10)]
     pub top_k: Option<i32>,
     #[serde(default)]
@@ -107,7 +218,7 @@ pub(crate) struct GenerateParameters {
     #[schema(default = "false", example = true)]
     pub do_sample: bool,
     #[serde(default = "default_max_new_tokens")]
-    #[schema(nullable = true, default = "null", example = "20")]
+    #[schema(nullable = true, default = "100", example = "20")]
     pub max_new_tokens: Option<u32>,
     #[serde(default)]
     #[schema(nullable = true, default = "null", example = false)]
@@ -138,10 +249,12 @@ pub(crate) struct GenerateParameters {
     #[serde(default)]
     #[schema(exclusive_minimum = 0, nullable = true, default = "null", example = 5)]
     pub top_n_tokens: Option<u32>,
+    #[serde(default)]
+    pub grammar: Option<GrammarType>,
 }
 
 fn default_max_new_tokens() -> Option<u32> {
-    None
+    Some(100)
 }
 
 fn default_parameters() -> GenerateParameters {
@@ -149,10 +262,11 @@ fn default_parameters() -> GenerateParameters {
         best_of: None,
         temperature: None,
         repetition_penalty: None,
+        frequency_penalty: None,
         top_k: None,
         top_p: None,
         typical_p: None,
-        do_sample: false,
+        do_sample: true,
         max_new_tokens: default_max_new_tokens(),
         return_full_text: None,
         stop: Vec::new(),
@@ -162,9 +276,538 @@ fn default_parameters() -> GenerateParameters {
         decoder_input_details: false,
         seed: None,
         top_n_tokens: None,
+        grammar: None,
     }
 }
 
+#[derive(Clone, Deserialize, Serialize, ToSchema, Debug)]
+pub struct CompletionRequest {
+    /// UNUSED
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
+    /// ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
+    pub model: String,
+
+    /// The prompt to generate completions for.
+    #[schema(example = "What is Deep Learning?")]
+    pub prompt: String,
+
+    /// The maximum number of tokens that can be generated in the chat completion.
+    #[serde(default)]
+    #[schema(default = "32")]
+    pub max_tokens: Option<u32>,
+
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
+    /// lower values like 0.2 will make it more focused and deterministic. We generally recommend altering this or `top_p` but not both.
+    #[serde(default)]
+    #[schema(nullable = true, example = 1.0)]
+    pub temperature: Option<f32>,
+
+    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the
+    /// tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    #[serde(default)]
+    #[schema(nullable = true, example = 0.95)]
+    pub top_p: Option<f32>,
+
+    #[serde(default = "bool::default")]
+    pub stream: bool,
+
+    #[schema(nullable = true, example = 42)]
+    pub seed: Option<u64>,
+
+    /// The text to append to the prompt. This is useful for completing sentences or generating a paragraph of text.
+    /// please see the completion_template field in the model's tokenizer_config.json file for completion template.
+    #[serde(default)]
+    pub suffix: Option<String>,
+
+    #[serde(default)]
+    pub repetition_penalty: Option<f32>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
+    /// decreasing the model's likelihood to repeat the same line verbatim.
+    #[serde(default)]
+    #[schema(example = "1.0")]
+    pub frequency_penalty: Option<f32>,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema, Default)]
+pub(crate) struct Completion {
+    pub id: String,
+    pub object: String,
+    #[schema(example = "1706270835")]
+    pub created: u64,
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
+    pub model: String,
+    pub system_fingerprint: String,
+    pub choices: Vec<CompletionComplete>,
+    pub usage: Usage,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct CompletionComplete {
+    pub index: u32,
+    pub text: String,
+    pub logprobs: Option<Vec<f32>>,
+    pub finish_reason: String,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletion {
+    pub id: String,
+    pub object: String,
+    #[schema(example = "1706270835")]
+    pub created: u64,
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
+    pub model: String,
+    pub system_fingerprint: String,
+    pub choices: Vec<ChatCompletionComplete>,
+    pub usage: Usage,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionComplete {
+    pub index: u32,
+    pub message: Message,
+    pub logprobs: Option<ChatCompletionLogprobs>,
+    pub finish_reason: String,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionLogprobs {
+    content: Vec<ChatCompletionLogprob>,
+}
+
+impl From<(Token, Vec<Token>)> for ChatCompletionLogprobs {
+    fn from(value: (Token, Vec<Token>)) -> Self {
+        let (token, top_tokens) = value;
+
+        Self {
+            content: vec![ChatCompletionLogprob {
+                token: token.text,
+                logprob: token.logprob,
+                top_logprobs: top_tokens
+                    .into_iter()
+                    .map(|t| ChatCompletionTopLogprob {
+                        token: t.text,
+                        logprob: t.logprob,
+                    })
+                    .collect(),
+            }],
+        }
+    }
+}
+
+impl From<(Vec<Token>, Vec<Vec<Token>>)> for ChatCompletionLogprobs {
+    fn from(value: (Vec<Token>, Vec<Vec<Token>>)) -> Self {
+        let (tokens, top_tokens) = value;
+
+        // Create an iterator that produces None for top_tokens once it's exhausted
+        let top_tokens_iter = top_tokens
+            .into_iter()
+            .map(Some)
+            .chain(std::iter::repeat(None));
+
+        let content = tokens
+            .into_iter()
+            .zip(top_tokens_iter)
+            .map(|(t, top_t_option)| ChatCompletionLogprob {
+                token: t.text,
+                logprob: t.logprob,
+                top_logprobs: match top_t_option {
+                    Some(top_t) => top_t
+                        .into_iter()
+                        .map(|t| ChatCompletionTopLogprob {
+                            token: t.text,
+                            logprob: t.logprob,
+                        })
+                        .collect(),
+                    None => vec![], // Handle the case where there are no top tokens
+                },
+            })
+            .collect();
+
+        Self { content }
+    }
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionLogprob {
+    token: String,
+    logprob: f32,
+    top_logprobs: Vec<ChatCompletionTopLogprob>,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionTopLogprob {
+    token: String,
+    logprob: f32,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema, Default)]
+pub(crate) struct Usage {
+    pub prompt_tokens: u32,
+    pub completion_tokens: u32,
+    pub total_tokens: u32,
+}
+
+impl ChatCompletion {
+    pub(crate) fn new(
+        model: String,
+        system_fingerprint: String,
+        output: Option<String>,
+        created: u64,
+        details: Details,
+        return_logprobs: bool,
+        tool_calls: Option<Vec<ToolCall>>,
+    ) -> Self {
+        Self {
+            id: String::new(),
+            object: "text_completion".into(),
+            created,
+            model,
+            system_fingerprint,
+            choices: vec![ChatCompletionComplete {
+                index: 0,
+                message: Message {
+                    role: "assistant".into(),
+                    content: output,
+                    name: None,
+                    tool_calls,
+                },
+                logprobs: return_logprobs
+                    .then(|| ChatCompletionLogprobs::from((details.tokens, details.top_tokens))),
+                finish_reason: details.finish_reason.to_string(),
+            }],
+            usage: Usage {
+                prompt_tokens: details.prefill.len() as u32,
+                completion_tokens: details.generated_tokens,
+                total_tokens: details.prefill.len() as u32 + details.generated_tokens,
+            },
+        }
+    }
+}
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct CompletionCompleteChunk {
+    pub id: String,
+    pub object: String,
+    pub created: u64,
+    pub choices: Vec<CompletionComplete>,
+    pub model: String,
+    pub system_fingerprint: String,
+}
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionChunk {
+    pub id: String,
+    pub object: String,
+    #[schema(example = "1706270978")]
+    pub created: u64,
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
+    pub model: String,
+    pub system_fingerprint: String,
+    pub choices: Vec<ChatCompletionChoice>,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionChoice {
+    pub index: u32,
+    pub delta: ChatCompletionDelta,
+    pub logprobs: Option<ChatCompletionLogprobs>,
+    pub finish_reason: Option<String>,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize, ToSchema)]
+pub(crate) struct ChatCompletionDelta {
+    #[schema(example = "user")]
+    pub role: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[schema(example = "What is Deep Learning?")]
+    pub content: Option<String>,
+    // default to None
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<DeltaToolCall>,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema, Debug)]
+pub(crate) struct DeltaToolCall {
+    pub index: u32,
+    pub id: String,
+    pub r#type: String,
+    pub function: Function,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema, Debug)]
+pub(crate) struct Function {
+    pub name: Option<String>,
+    pub arguments: String,
+}
+
+#[allow(clippy::too_many_arguments)]
+impl ChatCompletionChunk {
+    pub(crate) fn new(
+        model: String,
+        system_fingerprint: String,
+        delta: Option<String>,
+        tool_calls: Option<Vec<String>>,
+        created: u64,
+        logprobs: Option<ChatCompletionLogprobs>,
+        finish_reason: Option<String>,
+    ) -> Self {
+        Self {
+            id: String::new(),
+            object: "text_completion".to_string(),
+            created,
+            model,
+            system_fingerprint,
+            choices: vec![ChatCompletionChoice {
+                index: 0,
+                delta: ChatCompletionDelta {
+                    role: "assistant".to_string(),
+                    content: delta,
+                    tool_calls: tool_calls.map(|tc| DeltaToolCall {
+                        index: 0,
+                        id: String::new(),
+                        r#type: "function".to_string(),
+                        function: Function {
+                            name: None,
+                            arguments: tc[0].to_string(),
+                        },
+                    }),
+                },
+                logprobs,
+                finish_reason,
+            }],
+        }
+    }
+}
+
+#[derive(Clone, Deserialize, ToSchema, Serialize)]
+pub(crate) struct ChatRequest {
+    #[schema(example = "mistralai/Mistral-7B-Instruct-v0.2")]
+    /// [UNUSED] ID of the model to use. See the model endpoint compatibility table for details on which models work with the Chat API.
+    pub model: String,
+
+    /// A list of messages comprising the conversation so far.
+    #[schema(example = "[{\"role\": \"user\", \"content\": \"What is Deep Learning?\"}]")]
+    pub messages: Vec<Message>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far,
+    /// decreasing the model's likelihood to repeat the same line verbatim.
+    #[serde(default)]
+    #[schema(example = "1.0")]
+    pub frequency_penalty: Option<f32>,
+
+    /// UNUSED
+    /// Modify the likelihood of specified tokens appearing in the completion. Accepts a JSON object that maps tokens
+    /// (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically,
+    /// the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model,
+    /// but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should
+    /// result in a ban or exclusive selection of the relevant token.
+    #[serde(default)]
+    pub logit_bias: Option<Vec<f32>>,
+
+    /// Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each
+    /// output token returned in the content of message.
+    #[serde(default)]
+    #[schema(example = "false")]
+    pub logprobs: Option<bool>,
+
+    /// An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with
+    /// an associated log probability. logprobs must be set to true if this parameter is used.
+    #[serde(default)]
+    #[schema(example = "5")]
+    pub top_logprobs: Option<u32>,
+
+    /// The maximum number of tokens that can be generated in the chat completion.
+    #[serde(default)]
+    #[schema(example = "32")]
+    pub max_tokens: Option<u32>,
+
+    /// UNUSED
+    /// How many chat completion choices to generate for each input message. Note that you will be charged based on the
+    /// number of generated tokens across all of the choices. Keep n as 1 to minimize costs.
+    #[serde(default)]
+    #[schema(nullable = true, example = "2")]
+    pub n: Option<u32>,
+
+    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far,
+    /// increasing the model's likelihood to talk about new topics
+    #[serde(default)]
+    #[schema(nullable = true, example = 0.1)]
+    pub presence_penalty: Option<f32>,
+
+    /// Up to 4 sequences where the API will stop generating further tokens.
+    #[serde(default)]
+    #[schema(nullable = true, example = "null")]
+    pub stop: Option<Vec<String>>,
+
+    #[serde(default = "bool::default")]
+    pub stream: bool,
+
+    #[schema(nullable = true, example = 42)]
+    pub seed: Option<u64>,
+
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while
+    /// lower values like 0.2 will make it more focused and deterministic.
+    ///
+    /// We generally recommend altering this or `top_p` but not both.
+    #[serde(default)]
+    #[schema(nullable = true, example = 1.0)]
+    pub temperature: Option<f32>,
+
+    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the
+    /// tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    #[serde(default)]
+    #[schema(nullable = true, example = 0.95)]
+    pub top_p: Option<f32>,
+
+    /// A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of
+    /// functions the model may generate JSON inputs for.
+    #[serde(default)]
+    #[schema(nullable = true, example = "null")]
+    pub tools: Option<Vec<Tool>>,
+
+    /// A prompt to be appended before the tools
+    #[serde(default = "default_tool_prompt")]
+    #[schema(
+        nullable = true,
+        example = "\"Based on the conversation, please choose the most appropriate tool to use: \""
+    )]
+    pub tool_prompt: Option<String>,
+
+    /// A specific tool to use. If not provided, the model will default to use any of the tools provided in the tools parameter.
+    #[serde(default)]
+    #[schema(nullable = true, example = "null")]
+    #[serde(deserialize_with = "deserialize_tool_choice::deserialize")]
+    pub tool_choice: Option<ToolType>,
+}
+
+fn default_tool_prompt() -> Option<String> {
+    Some(
+        "\nBased on the conversation, please choose the most appropriate tool to use: ".to_string(),
+    )
+}
+#[derive(Clone, Deserialize, ToSchema, Serialize)]
+enum ToolType {
+    FunctionName(String),
+    OneOf,
+}
+
+/// Deserialize the tool choice from the JSON input or from the function name ("none" is allowed but mapped to None)
+mod deserialize_tool_choice {
+    use super::*;
+    use serde::de;
+    use serde::Deserializer;
+    use serde_json::Value;
+
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<ToolType>, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let value = Value::deserialize(deserializer)?;
+
+        match value {
+            Value::String(s) => match s.as_str() {
+                "none" => Ok(None),
+                "auto" => Ok(Some(ToolType::OneOf)),
+                _ => Ok(Some(ToolType::FunctionName(s))),
+            },
+            Value::Object(map) => {
+                if let Some(content) = map
+                    .get("function")
+                    .and_then(|v| v.get("name"))
+                    .and_then(|v| v.as_str())
+                {
+                    Ok(Some(ToolType::FunctionName(content.to_string())))
+                } else {
+                    Err(de::Error::custom("function key not found in tool choice"))
+                }
+            }
+            Value::Null => Ok(Some(ToolType::OneOf)),
+            _ => Err(de::Error::custom("invalid token format")),
+        }
+    }
+}
+
+#[derive(Debug, Deserialize, Serialize, ToSchema)]
+pub struct Tools {
+    #[serde(flatten)]
+    functions_map: FunctionsMap,
+    properties: Properties,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct FunctionsMap {
+    #[serde(rename = "$functions")]
+    functions: std::collections::HashMap<String, serde_json::Value>,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct FunctionRef {
+    #[serde(rename = "$ref")]
+    ref_path: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+struct Properties {
+    #[serde(serialize_with = "serialize_function")]
+    function: Vec<FunctionRef>,
+}
+
+fn serialize_function<S>(functions: &Vec<FunctionRef>, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: serde::Serializer,
+{
+    use serde::ser::SerializeStruct;
+    let mut state = serializer.serialize_struct("Function", 1)?;
+    state.serialize_field("anyOf", functions)?;
+    state.end()
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize, ToSchema, Default)]
+pub(crate) struct FunctionDefinition {
+    #[serde(default)]
+    pub description: Option<String>,
+    pub name: String,
+    pub parameters: serde_json::Value,
+}
+
+#[derive(Clone, Debug, Deserialize, Serialize, ToSchema)]
+pub(crate) struct Tool {
+    // The type of the tool. Currently, only 'function' is supported.
+    #[schema(example = "function")]
+    pub r#type: String,
+    // Grab the tool as generic JSON for debugging purposes.
+    pub function: FunctionDefinition,
+}
+
+#[derive(Clone, Serialize, Deserialize)]
+pub(crate) struct ChatTemplateInputs<'a> {
+    messages: Vec<Message>,
+    bos_token: Option<&'a str>,
+    eos_token: Option<&'a str>,
+    add_generation_prompt: bool,
+}
+
+#[derive(Clone, Deserialize, Serialize, ToSchema, Default, Debug)]
+pub(crate) struct ToolCall {
+    pub id: u32,
+    pub r#type: String,
+    pub function: FunctionDefinition,
+}
+
+#[derive(Clone, Deserialize, ToSchema, Serialize)]
+pub(crate) struct Message {
+    #[schema(example = "user")]
+    pub role: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[schema(example = "My name is David and I")]
+    pub content: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    #[schema(example = "\"David\"")]
+    pub name: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub tool_calls: Option<Vec<ToolCall>>,
+}
+
 #[derive(Clone, Debug, Deserialize, ToSchema)]
 pub(crate) struct GenerateRequest {
     #[schema(example = "My name is Olivier and I")]
@@ -203,7 +846,7 @@ pub struct PrefillToken {
     logprob: f32,
 }
 
-#[derive(Debug, Serialize, ToSchema)]
+#[derive(Debug, Serialize, ToSchema, Clone)]
 pub struct Token {
     #[schema(example = 0)]
     id: u32,
@@ -215,8 +858,21 @@ pub struct Token {
     special: bool,
 }
 
+#[derive(Debug, Serialize, ToSchema)]
+pub struct SimpleToken {
+    #[schema(example = 0)]
+    id: u32,
+    #[schema(example = "test")]
+    text: String,
+    #[schema(example = 0)]
+    start: usize,
+    #[schema(example = 2)]
+    stop: usize,
+}
+
 #[derive(Serialize, ToSchema)]
 #[serde(rename_all(serialize = "snake_case"))]
+#[schema(example = "Length")]
 pub(crate) enum FinishReason {
     #[schema(rename = "length")]
     Length,
@@ -227,6 +883,16 @@ pub(crate) enum FinishReason {
     StopSequence,
 }
 
+impl std::fmt::Display for FinishReason {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            FinishReason::Length => write!(f, "length"),
+            FinishReason::EndOfSequenceToken => write!(f, "eos_token"),
+            FinishReason::StopSequence => write!(f, "stop_sequence"),
+        }
+    }
+}
+
 #[derive(Serialize, ToSchema)]
 pub(crate) struct BestOfSequence {
     #[schema(example = "test")]
@@ -267,6 +933,10 @@ pub(crate) struct GenerateResponse {
     pub details: Option<Details>,
 }
 
+#[derive(Serialize, ToSchema)]
+#[serde(transparent)]
+pub(crate) struct TokenizeResponse(Vec<SimpleToken>);
+
 #[derive(Serialize, ToSchema)]
 pub(crate) struct StreamDetails {
     #[schema(example = "length")]
@@ -279,6 +949,7 @@ pub(crate) struct StreamDetails {
 
 #[derive(Serialize, ToSchema)]
 pub(crate) struct StreamResponse {
+    pub index: u32,
     pub token: Token,
     #[serde(skip_serializing_if = "Vec::is_empty")]
     pub top_tokens: Vec<Token>,
@@ -296,26 +967,73 @@ pub(crate) struct ErrorResponse {
 
 #[cfg(test)]
 mod tests {
-    use std::io::Write;
+    use super::*;
+
     use tokenizers::Tokenizer;
 
     pub(crate) async fn get_tokenizer() -> Tokenizer {
-        let filename = std::path::Path::new("tokenizer.json");
-        if !filename.exists() {
-            let content = reqwest::get("https://huggingface.co/gpt2/raw/main/tokenizer.json")
-                .await
-                .unwrap()
-                .bytes()
-                .await
-                .unwrap();
-            let tmp_filename = "tokenizer.json.temp";
-            let mut file = std::fs::File::create(tmp_filename).unwrap();
-            file.write_all(&content).unwrap();
-            // Re-check if another process has written this file maybe.
-            if !filename.exists() {
-                std::fs::rename(tmp_filename, filename).unwrap()
+        let api = hf_hub::api::sync::Api::new().unwrap();
+        let repo = api.model("gpt2".to_string());
+        let filename = repo.get("tokenizer.json").unwrap();
+        Tokenizer::from_file(filename).unwrap()
+    }
+
+    #[test]
+    fn test_hub_nested_tokens_tokenizer_config() {
+        // this is a subset of the tokenizer.json file
+        // in this case we expect the tokens to be encoded as simple strings
+        let json_content = r#"{
+            "chat_template": "test",
+            "bos_token": "<｜begin▁of▁sentence｜>",
+            "eos_token": "<｜end▁of▁sentence｜>"
+        }"#;
+
+        let config: HubTokenizerConfig = serde_json::from_str(json_content).unwrap();
+
+        // check that we successfully parsed the tokens
+        assert_eq!(
+            config.chat_template,
+            Some(ChatTemplateVersions::Single("test".to_string()))
+        );
+        assert_eq!(
+            config.bos_token,
+            Some("<｜begin▁of▁sentence｜>".to_string())
+        );
+        assert_eq!(config.eos_token, Some("<｜end▁of▁sentence｜>".to_string()));
+
+        // in this case we expect the tokens to be encoded as structured tokens
+        // we want the content of the structured token
+        let json_content = r#"{
+            "chat_template": "test",
+            "bos_token": {
+              "__type": "AddedToken",
+              "content": "<｜begin▁of▁sentence｜>",
+              "lstrip": false,
+              "normalized": true,
+              "rstrip": false,
+              "single_word": false
+            },
+            "eos_token": {
+              "__type": "AddedToken",
+              "content": "<｜end▁of▁sentence｜>",
+              "lstrip": false,
+              "normalized": true,
+              "rstrip": false,
+              "single_word": false
             }
-        }
-        Tokenizer::from_file("tokenizer.json").unwrap()
+        }"#;
+
+        let config: HubTokenizerConfig = serde_json::from_str(json_content).unwrap();
+
+        // check that we successfully parsed the tokens
+        assert_eq!(
+            config.chat_template,
+            Some(ChatTemplateVersions::Single("test".to_string()))
+        );
+        assert_eq!(
+            config.bos_token,
+            Some("<｜begin▁of▁sentence｜>".to_string())
+        );
+        assert_eq!(config.eos_token, Some("<｜end▁of▁sentence｜>".to_string()));
     }
 }
diff --git a/router/src/main.rs b/router/src/main.rs
index dd1ef7e2..a224dd4a 100644
--- a/router/src/main.rs
+++ b/router/src/main.rs
@@ -1,8 +1,9 @@
 /// Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
-/// Text Generation Inference webserver entrypoint
 use axum::http::HeaderValue;
 use clap::Parser;
+use hf_hub::api::tokio::{Api, ApiBuilder, ApiRepo};
+use hf_hub::{Repo, RepoType};
 use opentelemetry::sdk::propagation::TraceContextPropagator;
 use opentelemetry::sdk::trace;
 use opentelemetry::sdk::trace::Sampler;
@@ -10,13 +11,15 @@ use opentelemetry::sdk::Resource;
 use opentelemetry::{global, KeyValue};
 use opentelemetry_otlp::WithExportConfig;
 use std::env;
+use std::fs::File;
+use std::io::BufReader;
 use std::net::{IpAddr, Ipv4Addr, SocketAddr};
 use std::path::Path;
-use std::time::Duration;
 use text_generation_client::{ClientError, ShardedClient};
-use text_generation_router::{server, HubModelInfo};
+use text_generation_router::config::Config;
+use text_generation_router::{server, HubModelInfo, HubTokenizerConfig};
 use thiserror::Error;
-use tokenizers::{FromPretrainedParameters, Tokenizer};
+use tokenizers::Tokenizer;
 use tower_http::cors::AllowOrigin;
 use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::util::SubscriberInitExt;
@@ -35,7 +38,7 @@ struct Args {
     #[clap(default_value = "5", long, env)]
     max_top_n_tokens: u32,
     #[clap(default_value = "1024", long, env)]
-    max_input_length: usize,
+    max_input_tokens: usize,
     #[clap(default_value = "2048", long, env)]
     max_total_tokens: usize,
     #[clap(default_value = "1.2", long, env)]
@@ -46,6 +49,8 @@ struct Args {
     max_batch_total_tokens: Option<u32>,
     #[clap(default_value = "20", long, env)]
     max_waiting_tokens: usize,
+    #[clap(long, env)]
+    max_batch_size: Option<usize>,
     #[clap(default_value = "0.0.0.0", long, env)]
     hostname: String,
     #[clap(default_value = "3000", long, short, env)]
@@ -55,6 +60,8 @@ struct Args {
     #[clap(default_value = "bigscience/bloom", long, env)]
     tokenizer_name: String,
     #[clap(long, env)]
+    tokenizer_config_path: Option<String>,
+    #[clap(long, env)]
     revision: Option<String>,
     #[clap(default_value = "2", long, env)]
     validation_workers: usize,
@@ -70,9 +77,14 @@ struct Args {
     ngrok_authtoken: Option<String>,
     #[clap(long, env)]
     ngrok_edge: Option<String>,
+    #[clap(long, env, default_value_t = false)]
+    messages_api_enabled: bool,
+    #[clap(long, env, default_value_t = false)]
+    disable_grammar_support: bool,
 }
 
-fn main() -> Result<(), RouterError> {
+#[tokio::main]
+async fn main() -> Result<(), RouterError> {
     // Get args
     let args = Args::parse();
     // Pattern match configuration
@@ -81,16 +93,18 @@ fn main() -> Result<(), RouterError> {
         max_best_of,
         max_stop_sequences,
         max_top_n_tokens,
-        max_input_length,
+        max_input_tokens,
         max_total_tokens,
         waiting_served_ratio,
         max_batch_prefill_tokens,
         max_batch_total_tokens,
         max_waiting_tokens,
+        max_batch_size,
         hostname,
         port,
         master_shard_uds_path,
         tokenizer_name,
+        tokenizer_config_path,
         revision,
         validation_workers,
         json_output,
@@ -99,16 +113,21 @@ fn main() -> Result<(), RouterError> {
         ngrok,
         ngrok_authtoken,
         ngrok_edge,
+        messages_api_enabled,
+        disable_grammar_support,
     } = args;
 
+    // Launch Tokio runtime
+    init_logging(otlp_endpoint, json_output);
+
     // Validate args
-    if max_input_length >= max_total_tokens {
+    if max_input_tokens >= max_total_tokens {
         return Err(RouterError::ArgumentValidation(
-            "`max_input_length` must be < `max_total_tokens`".to_string(),
+            "`max_input_tokens` must be < `max_total_tokens`".to_string(),
         ));
     }
-    if max_input_length as u32 > max_batch_prefill_tokens {
-        return Err(RouterError::ArgumentValidation(format!("`max_batch_prefill_tokens` must be >= `max_input_length`. Given: {max_batch_prefill_tokens} and {max_input_length}")));
+    if max_input_tokens as u32 > max_batch_prefill_tokens {
+        return Err(RouterError::ArgumentValidation(format!("`max_batch_prefill_tokens` must be >= `max_input_tokens`. Given: {max_batch_prefill_tokens} and {max_input_tokens}")));
     }
 
     if validation_workers == 0 {
@@ -126,6 +145,25 @@ fn main() -> Result<(), RouterError> {
         }
     }
 
+    let (max_batch_size, max_batch_total_tokens) = match (max_batch_size, max_batch_total_tokens) {
+        (Some(_max_batch_size), Some(_max_batch_total_tokens)) => {
+            if (_max_batch_total_tokens as usize / max_total_tokens) != _max_batch_size {
+                tracing::warn!("max_batch_size was set to {_max_batch_size} while max_batch_total_tokens to {_max_batch_total_tokens}");
+                tracing::warn!("These values are not match, so max_batch_size will be preferred");
+                (Some(_max_batch_size), Some((_max_batch_size * max_total_tokens) as u32))
+            } else {
+                (Some(_max_batch_size), Some(_max_batch_total_tokens))
+            }
+        },
+        (Some(_max_batch_size), None) => (
+            Some(_max_batch_size), Some((_max_batch_size * max_total_tokens) as u32)
+        ),
+        (None, Some(_max_batch_total_tokens)) => (
+            Some(_max_batch_total_tokens as usize / max_total_tokens), Some(_max_batch_total_tokens)
+        ),
+        (None, None) => (None, None),
+    };
+
     // CORS allowed origins
     // map to go inside the option and then map to parse from String to HeaderValue
     // Finally, convert to AllowOrigin
@@ -144,165 +182,264 @@ fn main() -> Result<(), RouterError> {
     // This will only be used to validate payloads
     let local_path = Path::new(&tokenizer_name);
     let local_model = local_path.exists() && local_path.is_dir();
+
+    // Shared API builder initialization
+    let api_builder = || {
+        let mut builder = ApiBuilder::new()
+            .with_progress(false)
+            .with_token(authorization_token);
+
+        if let Ok(cache_dir) = std::env::var("HUGGINGFACE_HUB_CACHE") {
+            builder = builder.with_cache_dir(cache_dir.into());
+        }
+
+        builder
+    };
+
+    // Decide if we need to use the API based on the revision and local path
+    let use_api = revision.is_some() || !local_path.exists() || !local_path.is_dir();
+
+    // Initialize API if needed
+    let api = if use_api {
+        tracing::info!("Using the Hugging Face API");
+        match api_builder().build() {
+            Ok(api) => Some(api),
+            Err(_) => {
+                tracing::warn!("Unable to build the Hugging Face API");
+                None
+            }
+        }
+    } else {
+        None
+    };
+
+    // Load tokenizer and model info
     let skip_tokenizer_in_tgi = env::var("SKIP_TOKENIZER_IN_TGI")
         .ok()
         .map_or(false, |value| value.to_lowercase() == "true");
-    let tokenizer = if skip_tokenizer_in_tgi {
-        None
-    } else if local_model {
-        // Load local tokenizer
-        Tokenizer::from_file(local_path.join("tokenizer.json")).ok()
-    } else {
-        // Download and instantiate tokenizer
-        // We need to download it outside of the Tokio runtime
-        let params = FromPretrainedParameters {
-            revision: revision.clone().unwrap_or("main".to_string()),
-            auth_token: authorization_token.clone(),
-            ..Default::default()
+    let (tokenizer, model_info, config) = if local_model {
+        let tokenizer = if skip_tokenizer_in_tgi {
+            None
+        } else {
+            Tokenizer::from_file(local_path.join("tokenizer.json")).ok()
         };
-        Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).ok()
+        let model_info = HubModelInfo {
+            model_id: tokenizer_name.to_string(),
+            sha: None,
+            pipeline_tag: None,
+        };
+        let config: Option<Config> = std::fs::read_to_string(local_path.join("config.json"))
+            .ok()
+            .as_ref()
+            .and_then(|c| serde_json::from_str(c).ok());
+
+        (tokenizer, model_info, config)
+    } else if let Some(api) = api.clone() {
+        let api_repo = api.repo(Repo::with_revision(
+            tokenizer_name.to_string(),
+            RepoType::Model,
+            revision.clone().unwrap_or_else(|| "main".to_string()),
+        ));
+
+        let tokenizer = match api_repo.get("tokenizer.json").await {
+            Ok(tokenizer_filename) => Tokenizer::from_file(tokenizer_filename).ok(),
+            Err(_) => get_base_tokenizer(&api, &api_repo).await,
+        };
+
+        let config: Option<Config> = api_repo.get("config.json").await.ok().and_then(|filename| {
+            std::fs::read_to_string(filename)
+                .ok()
+                .as_ref()
+                .and_then(|c| {
+                    let config: Result<Config, _> = serde_json::from_str(c);
+                    if let Err(err) = &config {
+                        tracing::warn!("Could not parse config {err:?}");
+                    }
+                    config.ok()
+                })
+        });
+
+        let model_info = get_model_info(&api_repo).await.unwrap_or_else(|| {
+            tracing::warn!("Could not retrieve model info from the Hugging Face hub.");
+            HubModelInfo {
+                model_id: tokenizer_name.to_string(),
+                sha: None,
+                pipeline_tag: None,
+            }
+        });
+
+        (tokenizer, model_info, config)
+    } else {
+        // No API and no local model
+        return Err(RouterError::ArgumentValidation(
+            "No local model found and no revision specified".to_string(),
+        ));
     };
 
-    // Launch Tokio runtime
-    tokio::runtime::Builder::new_multi_thread()
-        .enable_all()
-        .build()?
-        .block_on(async {
-            init_logging(otlp_endpoint, json_output);
+    tracing::info!("Using config {config:?}");
 
-            if skip_tokenizer_in_tgi {
-                tracing::warn!("Rust input length validation disabled by environment variable");
-            } else if tokenizer.is_none() {
-                tracing::warn!(
-                    "Could not find a fast tokenizer implementation for {tokenizer_name}"
+    // Load tokenizer config if found locally, or check if we can get it from the API if needed
+    let tokenizer_config = if let Some(path) = tokenizer_config_path {
+        tracing::info!("Using local tokenizer config from user specified path");
+        HubTokenizerConfig::from_file(&std::path::PathBuf::from(path))
+    } else if local_model {
+        tracing::info!("Using local tokenizer config");
+        HubTokenizerConfig::from_file(&local_path.join("tokenizer_config.json"))
+    } else {
+        match api {
+            Some(api) => {
+                tracing::info!("Using the Hugging Face API to retrieve tokenizer config");
+                let repo = Repo::with_revision(
+                    tokenizer_name.to_string(),
+                    RepoType::Model,
+                    revision.unwrap_or("main".to_string()),
                 );
-                tracing::warn!("Rust input length validation and truncation is disabled");
-            }
-
-            // Get Model info
-            let model_info = match local_model {
-                true => HubModelInfo {
-                    model_id: tokenizer_name.clone(),
-                    sha: None,
-                    pipeline_tag: None,
-                },
-                false => get_model_info(&tokenizer_name, revision, authorization_token)
+                get_tokenizer_config(&api.repo(repo))
                     .await
                     .unwrap_or_else(|| {
-                        tracing::warn!("Could not retrieve model info from the Hugging Face hub.");
-                        HubModelInfo {
-                            model_id: tokenizer_name.to_string(),
-                            sha: None,
-                            pipeline_tag: None,
-                        }
-                    }),
-            };
-
-            // if pipeline-tag == text-generation we default to return_full_text = true
-            let compat_return_full_text = match &model_info.pipeline_tag {
-                None => {
-                    tracing::warn!("no pipeline tag found for model {tokenizer_name}");
-                    false
-                }
-                Some(pipeline_tag) => pipeline_tag.as_str() == "text-generation",
-            };
-
-            // Instantiate sharded client from the master unix socket
-            let mut sharded_client = ShardedClient::connect_uds(master_shard_uds_path)
-                .await
-                .map_err(RouterError::Connection)?;
-            // Clear the cache; useful if the webserver rebooted
-            sharded_client
-                .clear_cache(None)
-                .await
-                .map_err(RouterError::Cache)?;
-            // Get info from the shard
-            let shard_info = sharded_client.info().await.map_err(RouterError::Info)?;
-
-            // Warmup model
-            tracing::info!("Warming up model");
-            let max_supported_batch_total_tokens = match sharded_client
-                .warmup(max_input_length as u32, max_batch_prefill_tokens, max_total_tokens as u32, max_batch_total_tokens)
-                .await
-                .map_err(RouterError::Warmup)?
-            {
-                // Older models do not support automatic max-batch-total-tokens
-                None => {
-                    let max_batch_total_tokens = max_batch_total_tokens.unwrap_or(
-                        16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens)),
-                    );
-                    tracing::warn!("Model does not support automatic max batch total tokens");
-                    max_batch_total_tokens
-                }
-                // Flash attention models return their max supported total tokens
-                Some(max_supported_batch_total_tokens) => {
-                    // Warn if user added his own max-batch-total-tokens as we will ignore it
-                    if max_batch_total_tokens.is_some() {
                         tracing::warn!(
-                            "`--max-batch-total-tokens` is deprecated for Flash \
+                            "Could not retrieve tokenizer config from the Hugging Face hub."
+                        );
+                        HubTokenizerConfig::default()
+                    })
+            }
+            None => {
+                tracing::warn!("Could not find tokenizer config locally and no API specified");
+                HubTokenizerConfig::default()
+            }
+        }
+    };
+
+    if tokenizer.is_none() {
+        tracing::warn!("Could not find a fast tokenizer implementation for {tokenizer_name}");
+        tracing::warn!("Rust input length validation and truncation is disabled");
+    }
+
+    // if pipeline-tag == text-generation we default to return_full_text = true
+    let compat_return_full_text = match &model_info.pipeline_tag {
+        None => {
+            tracing::warn!("no pipeline tag found for model {tokenizer_name}");
+            true
+        }
+        Some(pipeline_tag) => pipeline_tag.as_str() == "text-generation",
+    };
+
+    // Instantiate sharded client from the master unix socket
+    let mut sharded_client = ShardedClient::connect_uds(master_shard_uds_path)
+        .await
+        .map_err(RouterError::Connection)?;
+    // Clear the cache; useful if the webserver rebooted
+    sharded_client
+        .clear_cache(None)
+        .await
+        .map_err(RouterError::Cache)?;
+    // Get info from the shard
+    let shard_info = sharded_client.info().await.map_err(RouterError::Info)?;
+
+    // Warmup model
+    tracing::info!("Warming up model");
+    let max_supported_batch_total_tokens = match sharded_client
+        .warmup(
+            max_input_tokens as u32,
+            max_batch_prefill_tokens,
+            max_total_tokens as u32,
+            max_batch_size,
+        )
+        .await
+        .map_err(RouterError::Warmup)?
+    {
+        // Older models do not support automatic max-batch-total-tokens
+        None => {
+            let max_batch_total_tokens = max_batch_total_tokens
+                .unwrap_or(16000.max((max_total_tokens as u32).max(max_batch_prefill_tokens)));
+            tracing::warn!("Model does not support automatic max batch total tokens");
+            max_batch_total_tokens
+        }
+        // Flash attention models return their max supported total tokens
+        Some(max_supported_batch_total_tokens) => {
+            // Warn if user added his own max-batch-total-tokens as we will ignore it
+            if max_batch_total_tokens.is_some() {
+                tracing::warn!(
+                    "`--max-batch-total-tokens` is deprecated for Flash \
                         Attention models."
-                        );
-                        tracing::warn!(
-                            "Inferred max batch total tokens: {max_supported_batch_total_tokens}"
-                        );
-                    }
-                    if max_total_tokens as u32 > max_supported_batch_total_tokens {
-                        return Err(RouterError::ArgumentValidation(format!("`max_total_tokens` must be <= `max_batch_total_tokens`. Given: {max_total_tokens} and {max_supported_batch_total_tokens}")));
-                    }
+                );
+                tracing::warn!(
+                    "Inferred max batch total tokens: {max_supported_batch_total_tokens}"
+                );
+            }
+            if max_total_tokens as u32 > max_supported_batch_total_tokens {
+                return Err(RouterError::ArgumentValidation(format!("`max_total_tokens` must be <= `max_batch_total_tokens`. Given: {max_total_tokens} and {max_supported_batch_total_tokens}")));
+            }
 
-                    max_supported_batch_total_tokens
-                }
-            };
-            tracing::info!("Setting max batch total tokens to {max_supported_batch_total_tokens}");
-            tracing::info!("Connected");
+            max_supported_batch_total_tokens
+        }
+    };
+    tracing::info!("Setting max batch total tokens to {max_supported_batch_total_tokens}");
+    tracing::info!("Connected");
 
-            let addr = match hostname.parse() {
-                Ok(ip) => SocketAddr::new(ip, port),
-                Err(_) => {
-                    tracing::warn!("Invalid hostname, defaulting to 0.0.0.0");
-                    SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), port)
-                }
-            };
+    // Determine the server port based on the feature and environment variable.
+    let port = if cfg!(feature = "google") {
+        std::env::var("AIP_HTTP_PORT")
+            .map(|aip_http_port| aip_http_port.parse::<u16>().unwrap_or(port))
+            .unwrap_or(port)
+    } else {
+        port
+    };
 
-            // Run server
-            server::run(
-                model_info,
-                shard_info,
-                compat_return_full_text,
-                max_concurrent_requests,
-                max_best_of,
-                max_stop_sequences,
-                max_top_n_tokens,
-                max_input_length,
-                max_total_tokens,
-                waiting_served_ratio,
-                max_batch_prefill_tokens,
-                max_supported_batch_total_tokens,
-                max_waiting_tokens,
-                sharded_client,
-                tokenizer,
-                validation_workers,
-                addr,
-                cors_allow_origin,
-                ngrok,
-                ngrok_authtoken,
-                ngrok_edge,
-            )
-                .await?;
-            Ok(())
-        })
+    let addr = match hostname.parse() {
+        Ok(ip) => SocketAddr::new(ip, port),
+        Err(_) => {
+            tracing::warn!("Invalid hostname, defaulting to 0.0.0.0");
+            SocketAddr::new(IpAddr::V4(Ipv4Addr::new(0, 0, 0, 0)), port)
+        }
+    };
+
+    // Run server
+    server::run(
+        model_info,
+        shard_info,
+        compat_return_full_text,
+        max_concurrent_requests,
+        max_best_of,
+        max_stop_sequences,
+        max_top_n_tokens,
+        max_input_tokens,
+        max_total_tokens,
+        waiting_served_ratio,
+        max_batch_prefill_tokens,
+        max_supported_batch_total_tokens,
+        max_waiting_tokens,
+        max_batch_size,
+        sharded_client,
+        tokenizer,
+        config,
+        validation_workers,
+        addr,
+        cors_allow_origin,
+        ngrok,
+        ngrok_authtoken,
+        ngrok_edge,
+        tokenizer_config,
+        messages_api_enabled,
+        disable_grammar_support,
+    )
+    .await?;
+    Ok(())
 }
 
 /// Init logging using env variables LOG_LEVEL and LOG_FORMAT:
 ///     - otlp_endpoint is an optional URL to an Open Telemetry collector
 ///     - LOG_LEVEL may be TRACE, DEBUG, INFO, WARN or ERROR (default to INFO)
 ///     - LOG_FORMAT may be TEXT or JSON (default to TEXT)
+///     - LOG_COLORIZE may be "false" or "true" (default to "true" or ansi supported platforms)
 fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
     let mut layers = Vec::new();
 
     // STDOUT/STDERR layer
+    let ansi = std::env::var("LOG_COLORIZE") != Ok("1".to_string());
     let fmt_layer = tracing_subscriber::fmt::layer()
         .with_file(true)
+        .with_ansi(ansi)
         .with_line_number(true);
 
     let fmt_layer = match json_output {
@@ -349,30 +486,8 @@ fn init_logging(otlp_endpoint: Option<String>, json_output: bool) {
 }
 
 /// get model info from the Huggingface Hub
-pub async fn get_model_info(
-    model_id: &str,
-    revision: Option<String>,
-    token: Option<String>,
-) -> Option<HubModelInfo> {
-    let revision = match revision {
-        None => {
-            tracing::warn!("`--revision` is not set");
-            tracing::warn!("We strongly advise to set it to a known supported commit.");
-            "main".to_string()
-        }
-        Some(revision) => revision,
-    };
-
-    let client = reqwest::Client::new();
-    // Poor man's urlencode
-    let revision = revision.replace('/', "%2F");
-    let url = format!("https://huggingface.co/api/models/{model_id}/revision/{revision}");
-    let mut builder = client.get(url).timeout(Duration::from_secs(5));
-    if let Some(token) = token {
-        builder = builder.bearer_auth(token);
-    }
-
-    let response = builder.send().await.ok()?;
+pub async fn get_model_info(api: &ApiRepo) -> Option<HubModelInfo> {
+    let response = api.info_request().send().await.ok()?;
 
     if response.status().is_success() {
         let hub_model_info: HubModelInfo =
@@ -389,6 +504,50 @@ pub async fn get_model_info(
     }
 }
 
+/// get base tokenizer
+pub async fn get_base_tokenizer(api: &Api, api_repo: &ApiRepo) -> Option<Tokenizer> {
+    let config_filename = api_repo.get("config.json").await.ok()?;
+
+    // Open the file in read-only mode with buffer.
+    let file = File::open(config_filename).ok()?;
+    let reader = BufReader::new(file);
+
+    // Read the JSON contents of the file as an instance of `User`.
+    let config: serde_json::Value = serde_json::from_reader(reader).ok()?;
+
+    if let Some(serde_json::Value::String(base_model_id)) = config.get("base_model_name_or_path") {
+        let api_base_repo = api.repo(Repo::with_revision(
+            base_model_id.to_string(),
+            RepoType::Model,
+            "main".to_string(),
+        ));
+
+        let tokenizer_filename = api_base_repo.get("tokenizer.json").await.ok()?;
+        Tokenizer::from_file(tokenizer_filename).ok()
+    } else {
+        None
+    }
+}
+
+/// get tokenizer_config from the Huggingface Hub
+pub async fn get_tokenizer_config(api_repo: &ApiRepo) -> Option<HubTokenizerConfig> {
+    let tokenizer_config_filename = api_repo.get("tokenizer_config.json").await.ok()?;
+
+    // Open the file in read-only mode with buffer.
+    let file = File::open(tokenizer_config_filename).ok()?;
+    let reader = BufReader::new(file);
+
+    // Read the JSON contents of the file as an instance of 'HubTokenizerConfig'.
+    let tokenizer_config: HubTokenizerConfig = serde_json::from_reader(reader)
+        .map_err(|e| {
+            tracing::warn!("Unable to parse tokenizer config: {}", e);
+            e
+        })
+        .ok()?;
+
+    Some(tokenizer_config)
+}
+
 #[derive(Debug, Error)]
 enum RouterError {
     #[error("Argument validation error: {0}")]
diff --git a/router/src/queue.rs b/router/src/queue.rs
index 6734c6a6..11690bf7 100644
--- a/router/src/queue.rs
+++ b/router/src/queue.rs
@@ -44,7 +44,8 @@ impl Queue {
         max_input_length: u32,
         max_total_tokens: u32,
         block_size: u32,
-        window_size: Option<u32>
+        window_size: Option<u32>,
+        speculate: u32,
     ) -> Self {
         // Create channel
         let (queue_sender, queue_receiver) = mpsc::unbounded_channel();
@@ -56,6 +57,7 @@ impl Queue {
             max_total_tokens,
             block_size,
             window_size,
+            speculate,
             queue_receiver,
         ));
 
@@ -77,6 +79,7 @@ impl Queue {
     pub(crate) async fn next_batch(
         &self,
         min_size: Option<usize>,
+        max_size: Option<usize>,
         prefill_token_budget: u32,
         token_budget: u32,
     ) -> Option<NextBatch> {
@@ -87,6 +90,7 @@ impl Queue {
         self.queue_sender
             .send(QueueCommand::NextBatch {
                 min_size,
+                max_size,
                 prefill_token_budget,
                 token_budget,
                 response_sender,
@@ -106,6 +110,7 @@ async fn queue_task(
     max_total_tokens: u32,
     block_size: u32,
     window_size: Option<u32>,
+    speculate: u32,
     mut receiver: mpsc::UnboundedReceiver<QueueCommand>,
 ) {
     let mut state = State::new(
@@ -113,7 +118,8 @@ async fn queue_task(
         max_input_length,
         max_total_tokens,
         block_size,
-        window_size
+        window_size,
+        speculate
     );
 
     while let Some(cmd) = receiver.recv().await {
@@ -124,12 +130,14 @@ async fn queue_task(
             }
             QueueCommand::NextBatch {
                 min_size,
+                max_size,
                 prefill_token_budget,
                 token_budget,
                 response_sender,
                 span,
             } => span.in_scope(|| {
-                let next_batch = state.next_batch(min_size, prefill_token_budget, token_budget);
+                let next_batch =
+                    state.next_batch(min_size, max_size, prefill_token_budget, token_budget);
                 response_sender.send(next_batch).unwrap();
                 metrics::gauge!("tgi_queue_size", state.entries.len() as f64);
             }),
@@ -256,6 +264,9 @@ struct State {
 
     /// Sliding window
     window_size: Option<u32>,
+
+    /// Speculation amount
+    speculate: u32,
 }
 
 impl State {
@@ -265,6 +276,7 @@ impl State {
         max_total_tokens: u32,
         block_size: u32,
         window_size: Option<u32>,
+        speculate: u32,
     ) -> Self {
         let default_threshold: u64 = 120;
         let threshold: u64 = match env::var("QUEUE_THRESHOLD_MS") {
@@ -281,6 +293,7 @@ impl State {
             max_total_tokens,
             block_size,
             window_size,
+            speculate,
         }
     }
 
@@ -299,16 +312,19 @@ impl State {
     fn next_batch(
         &mut self,
         min_size: Option<usize>,
+        max_size: Option<usize>,
         prefill_token_budget: u32,
         token_budget: u32,
     ) -> Option<NextBatch> {
         if self.entries.is_empty() {
+            tracing::debug!("No queue");
             return None;
         }
 
         // Check if we have enough entries
         if let Some(min_size) = min_size {
             if self.entries.len() < min_size {
+                tracing::debug!("Not enough entries");
                 return None;
             }
         }
@@ -332,6 +348,7 @@ impl State {
             // was dropped by the client)
             if entry.response_tx.is_closed() {
                 metrics::increment_counter!("tgi_request_failure", "err" => "dropped");
+                tracing::debug!("Dropping entry");
                 continue;
             }
 
@@ -365,14 +382,16 @@ impl State {
             }
 
             if prefill_tokens > prefill_token_budget
-                || (prefill_tokens + decode_tokens) > token_budget
+                || (prefill_tokens + decode_tokens + self.speculate) > token_budget
             {
                 // Entry is over budget
                 // Add it back to the front
+                tracing::debug!("Over budget: prefill_tokens={prefill_tokens} > {prefill_token_budget} || {prefill_tokens} + {decode_tokens} + {} > {token_budget}", self.speculate);
                 self.entries.push(IdentifiableEntry(id, entry));
                 break;
             }
 
+            tracing::debug!("Accepting entry");
             // Create a new span to link the batch back to this entry
             let entry_batch_span = info_span!(parent: &entry.span, "infer");
             // Add relationships
@@ -394,10 +413,16 @@ impl State {
             entry.batch_time = Some(Instant::now());
             // Insert in batch_entries IntMap
             batch_entries.insert(id, entry);
+
+            // Check if max_size
+            if Some(batch_requests.len()) == max_size {
+                break;
+            }
         }
 
         // Empty batch
         if batch_requests.is_empty() {
+            tracing::debug!("Filterered out all entries");
             return None;
         }
 
@@ -442,6 +467,7 @@ enum QueueCommand {
     Append(Box<Entry>, Span),
     NextBatch {
         min_size: Option<usize>,
+        max_size: Option<usize>,
         prefill_token_budget: u32,
         token_budget: u32,
         response_sender: oneshot::Sender<Option<NextBatch>>,
@@ -452,18 +478,20 @@ enum QueueCommand {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
+    use text_generation_client::{
+        GrammarType as ProtoGrammarType, NextTokenChooserParameters, StoppingCriteriaParameters,
+    };
     use tracing::info_span;
 
     fn default_queue() -> Queue {
         Queue::new(
-            true, 1, 2, 1, None
+            true, 1, 2, 1, None, 0
         )
     }
 
     fn default_state() -> State {
         State::new(
-            true, 1, 2, 1, None
+            true, 1, 2, 1, None, 0
         )
     }
 
@@ -475,7 +503,7 @@ mod tests {
 
         let entry = Entry {
             request: ValidGenerateRequest {
-                inputs: "".to_string(),
+                inputs: String::new(),
                 input_length: 0,
                 truncate: 0,
                 decoder_input_details: false,
@@ -487,7 +515,10 @@ mod tests {
                     do_sample: false,
                     seed: 0,
                     repetition_penalty: 0.0,
+                    frequency_penalty: 0.0,
                     watermark: false,
+                    grammar: String::new(),
+                    grammar_type: ProtoGrammarType::None as i32,
                 },
                 stopping_parameters: StoppingCriteriaParameters {
                     ignore_eos_token: false,
@@ -525,8 +556,8 @@ mod tests {
     fn test_next_batch_empty() {
         let mut state = default_state();
 
-        assert!(state.next_batch(None, 1, 1).is_none());
-        assert!(state.next_batch(Some(1), 1, 1).is_none());
+        assert!(state.next_batch(None, None, 1, 1).is_none());
+        assert!(state.next_batch(Some(1), None, 1, 1).is_none());
     }
 
     #[test]
@@ -537,7 +568,7 @@ mod tests {
         state.append(entry1);
         state.append(entry2);
 
-        let (entries, batch, _) = state.next_batch(None, 2, 4).unwrap();
+        let (entries, batch, _) = state.next_batch(None, None, 2, 4).unwrap();
         assert_eq!(entries.len(), 2);
         assert!(entries.contains_key(&0));
         assert!(entries.contains_key(&1));
@@ -553,7 +584,7 @@ mod tests {
         let (entry3, _guard3) = default_entry();
         state.append(entry3);
 
-        assert!(state.next_batch(Some(2), 2, 2).is_none());
+        assert!(state.next_batch(Some(2), None, 2, 2).is_none());
 
         assert_eq!(state.next_id, 3);
         assert_eq!(state.entries.len(), 1);
@@ -561,6 +592,26 @@ mod tests {
         assert_eq!(id, 2);
     }
 
+    #[test]
+    fn test_next_batch_max_size() {
+        let mut state = default_state();
+        let (entry1, _guard1) = default_entry();
+        let (entry2, _guard2) = default_entry();
+        state.append(entry1);
+        state.append(entry2);
+
+        let (entries, batch, _) = state.next_batch(None, Some(1), 2, 2).unwrap();
+        assert_eq!(entries.len(), 1);
+        assert!(entries.contains_key(&0));
+        assert!(entries.get(&0).unwrap().batch_time.is_some());
+        assert_eq!(batch.id, 0);
+        assert_eq!(batch.size, 1);
+
+        assert_eq!(state.next_id, 2);
+        assert_eq!(state.entries.len(), 1);
+        assert_eq!(state.next_batch_id, 1);
+    }
+
     #[test]
     fn test_next_batch_token_budget() {
         let mut state = default_state();
@@ -569,7 +620,7 @@ mod tests {
         state.append(entry1);
         state.append(entry2);
 
-        let (entries, batch, _) = state.next_batch(None, 1, 2).unwrap();
+        let (entries, batch, _) = state.next_batch(None, None, 1, 2).unwrap();
         assert_eq!(entries.len(), 1);
         assert!(entries.contains_key(&0));
         assert_eq!(batch.id, 0);
@@ -582,7 +633,7 @@ mod tests {
         let (entry3, _guard3) = default_entry();
         state.append(entry3);
 
-        let (entries, batch, _) = state.next_batch(None, 3, 6).unwrap();
+        let (entries, batch, _) = state.next_batch(None, None, 3, 6).unwrap();
         assert_eq!(entries.len(), 2);
         assert!(entries.contains_key(&1));
         assert!(entries.contains_key(&2));
@@ -605,8 +656,8 @@ mod tests {
     async fn test_queue_next_batch_empty() {
         let queue = default_queue();
 
-        assert!(queue.next_batch(None, 1, 1).await.is_none());
-        assert!(queue.next_batch(Some(1), 1, 1).await.is_none());
+        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
+        assert!(queue.next_batch(Some(1), None, 1, 1).await.is_none());
     }
 
     #[tokio::test]
@@ -617,7 +668,7 @@ mod tests {
         queue.append(entry1);
         queue.append(entry2);
 
-        let (entries, batch, _) = queue.next_batch(None, 2, 4).await.unwrap();
+        let (entries, batch, _) = queue.next_batch(None, None, 2, 4).await.unwrap();
         assert_eq!(entries.len(), 2);
         assert!(entries.contains_key(&0));
         assert!(entries.contains_key(&1));
@@ -630,11 +681,11 @@ mod tests {
         queue.append(entry3);
 
         // Not enough requests pending
-        assert!(queue.next_batch(Some(2), 2, 2).await.is_none());
+        assert!(queue.next_batch(Some(2), None, 2, 2).await.is_none());
         // Not enough token budget
-        assert!(queue.next_batch(Some(1), 0, 0).await.is_none());
+        assert!(queue.next_batch(Some(1), None, 0, 0).await.is_none());
         // Ok
-        let (entries2, batch2, _) = queue.next_batch(Some(1), 1, 2).await.unwrap();
+        let (entries2, batch2, _) = queue.next_batch(Some(1), None, 2, 4).await.unwrap();
         assert_eq!(entries2.len(), 1);
         assert!(entries2.contains_key(&2));
         assert!(entries2.get(&2).unwrap().batch_time.is_some());
@@ -642,6 +693,22 @@ mod tests {
         assert_eq!(batch2.size, 1);
     }
 
+    #[tokio::test]
+    async fn test_queue_next_batch_max_size() {
+        let queue = default_queue();
+        let (entry1, _guard1) = default_entry();
+        let (entry2, _guard2) = default_entry();
+        queue.append(entry1);
+        queue.append(entry2);
+
+        let (entries, batch, _) = queue.next_batch(None, Some(1), 2, 2).await.unwrap();
+        assert_eq!(entries.len(), 1);
+        assert!(entries.contains_key(&0));
+        assert!(entries.get(&0).unwrap().batch_time.is_some());
+        assert_eq!(batch.id, 0);
+        assert_eq!(batch.size, 1);
+    }
+
     #[tokio::test]
     async fn test_queue_next_batch_token_budget() {
         let queue = default_queue();
@@ -650,7 +717,7 @@ mod tests {
         queue.append(entry1);
         queue.append(entry2);
 
-        let (entries, batch, _) = queue.next_batch(None, 1, 2).await.unwrap();
+        let (entries, batch, _) = queue.next_batch(None, None, 1, 2).await.unwrap();
         assert_eq!(entries.len(), 1);
         assert!(entries.contains_key(&0));
         assert_eq!(batch.id, 0);
@@ -659,7 +726,7 @@ mod tests {
         let (entry3, _guard3) = default_entry();
         queue.append(entry3);
 
-        let (entries, batch, _) = queue.next_batch(None, 2, 4).await.unwrap();
+        let (entries, batch, _) = queue.next_batch(None, None, 3, 6).await.unwrap();
         assert_eq!(entries.len(), 2);
         assert!(entries.contains_key(&1));
         assert!(entries.contains_key(&2));
@@ -667,12 +734,31 @@ mod tests {
         assert_eq!(batch.size, 2);
     }
 
+    #[tokio::test]
+    async fn test_queue_next_batch_token_speculate() {
+        let queue = Queue::new(true, 1, 2, 1, None, 2);
+        let (entry1, _guard1) = default_entry();
+        let (entry2, _guard2) = default_entry();
+        queue.append(entry1);
+        queue.append(entry2);
+
+        // Budget of 1 is not enough
+        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
+
+        let (entries, batch, _) = queue.next_batch(None, None, 6, 6).await.unwrap();
+        assert_eq!(entries.len(), 2);
+        assert!(entries.contains_key(&0));
+        assert!(entries.contains_key(&1));
+        assert_eq!(batch.id, 0);
+        assert_eq!(batch.size, 2);
+    }
+
     #[tokio::test]
     async fn test_queue_next_batch_dropped_receiver() {
         let queue = default_queue();
         let (entry, _) = default_entry();
         queue.append(entry);
 
-        assert!(queue.next_batch(None, 1, 1).await.is_none());
+        assert!(queue.next_batch(None, None, 1, 1).await.is_none());
     }
 }
diff --git a/router/src/server.rs b/router/src/server.rs
index c2eab874..6f0b0fa9 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -1,14 +1,23 @@
 /// Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
+use crate::config::Config;
 /// HTTP Server logic
 use crate::health::Health;
 use crate::infer::{InferError, InferResponse, InferStreamResponse};
 use crate::validation::ValidationError;
 use crate::{
-    BestOfSequence, CompatGenerateRequest, Details, ErrorResponse, FinishReason,
-    GenerateParameters, GenerateRequest, GenerateResponse, HubModelInfo, Infer, Info, PrefillToken,
-    StreamDetails, StreamResponse, Token, Validation,
+    BestOfSequence, Details, ErrorResponse, FinishReason, GenerateParameters, GenerateRequest,
+    GenerateResponse, GrammarType, HubModelInfo, HubTokenizerConfig, Infer, Info, Message,
+    PrefillToken, SimpleToken, StreamDetails, StreamResponse, Token, TokenizeResponse, Usage,
+    Validation,
 };
+use crate::{
+    ChatCompletion, ChatCompletionChoice, ChatCompletionChunk, ChatCompletionComplete,
+    ChatCompletionDelta, ChatCompletionLogprob, ChatCompletionLogprobs, ChatCompletionTopLogprob,
+    ChatRequest, CompatGenerateRequest, Completion, CompletionComplete, CompletionCompleteChunk,
+    CompletionRequest, DeltaToolCall, Function, Tool, VertexRequest, VertexResponse,
+};
+use crate::{FunctionDefinition, FunctionRef, FunctionsMap, Properties, ToolCall, ToolType, Tools};
 use axum::extract::Extension;
 use axum::http::{HeaderMap, Method, StatusCode};
 use axum::response::sse::{Event, KeepAlive, Sse};
@@ -16,9 +25,13 @@ use axum::response::{IntoResponse, Response};
 use axum::routing::{get, post};
 use axum::{http, Json, Router};
 use axum_tracing_opentelemetry::middleware::OtelAxumLayer;
+use futures::stream::FuturesUnordered;
 use futures::stream::StreamExt;
 use futures::Stream;
+use futures::TryStreamExt;
 use metrics_exporter_prometheus::{Matcher, PrometheusBuilder, PrometheusHandle};
+use serde_json::Value;
+use std::collections::HashMap;
 use std::convert::Infallible;
 use std::net::SocketAddr;
 use std::sync::atomic::AtomicBool;
@@ -58,6 +71,7 @@ example = json ! ({"error": "Incomplete generation"})),
 async fn compat_generate(
     Extension(default_return_full_text): Extension<bool>,
     infer: Extension<Infer>,
+    compute_type: Extension<ComputeType>,
     Json(mut req): Json<CompatGenerateRequest>,
 ) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
     // default return_full_text given the pipeline_tag
@@ -67,11 +81,11 @@ async fn compat_generate(
 
     // switch on stream
     if req.stream {
-        Ok(generate_stream(infer, Json(req.into()))
+        Ok(generate_stream(infer, compute_type, Json(req.into()))
             .await
             .into_response())
     } else {
-        let (headers, Json(generation)) = generate(infer, Json(req.into())).await?;
+        let (headers, Json(generation)) = generate(infer, compute_type, Json(req.into())).await?;
         // wrap generation inside a Vec to match api-inference
         Ok((headers, Json(vec![generation])).into_response())
     }
@@ -146,13 +160,15 @@ seed,
 )]
 async fn generate(
     infer: Extension<Infer>,
+    Extension(ComputeType(compute_type)): Extension<ComputeType>,
     Json(req): Json<GenerateRequest>,
 ) -> Result<(HeaderMap, Json<GenerateResponse>), (StatusCode, Json<ErrorResponse>)> {
     let span = tracing::Span::current();
     let start_time = Instant::now();
     metrics::increment_counter!("tgi_request_count");
 
-    tracing::debug!("Input: {}", req.inputs);
+    // Do not long ultra long inputs, like image payloads.
+    tracing::debug!("Input: {}", &req.inputs[..1000.min(req.inputs.len())]);
 
     let compute_characters = req.inputs.chars().count();
     let mut add_prompt = None;
@@ -172,6 +188,7 @@ async fn generate(
     };
 
     // Token details
+    let input_length = response._input_length;
     let details = match details {
         true => {
             // convert best_of_responses
@@ -230,10 +247,10 @@ async fn generate(
 
     // Headers
     let mut headers = HeaderMap::new();
-    headers.insert("x-compute-type", "gpu+optimized".parse().unwrap());
+    headers.insert("x-compute-type", compute_type.parse().unwrap());
     headers.insert(
         "x-compute-time",
-        total_time.as_millis().to_string().parse().unwrap(),
+        total_time.as_secs_f64().to_string().parse().unwrap(),
     );
     headers.insert(
         "x-compute-characters",
@@ -259,6 +276,11 @@ async fn generate(
         "x-time-per-token",
         time_per_token.as_millis().to_string().parse().unwrap(),
     );
+    headers.insert("x-prompt-tokens", input_length.into());
+    headers.insert(
+        "x-generated-tokens",
+        response.generated_text.generated_tokens.into(),
+    );
 
     // Metrics
     metrics::increment_counter!("tgi_request_success");
@@ -334,11 +356,28 @@ seed,
 )]
 async fn generate_stream(
     Extension(infer): Extension<Infer>,
+    Extension(compute_type): Extension<ComputeType>,
     Json(req): Json<GenerateRequest>,
 ) -> (
     HeaderMap,
     Sse<impl Stream<Item = Result<Event, Infallible>>>,
 ) {
+    let on_message_callback = |stream_token: StreamResponse| {
+        let event = Event::default();
+        event.json_data(stream_token).unwrap()
+    };
+    let (headers, response_stream) =
+        generate_stream_internal(infer, compute_type, Json(req), on_message_callback).await;
+    let sse = Sse::new(response_stream).keep_alive(KeepAlive::default());
+    (headers, sse)
+}
+
+async fn generate_stream_internal(
+    infer: Infer,
+    ComputeType(compute_type): ComputeType,
+    Json(req): Json<GenerateRequest>,
+    on_message_callback: impl Fn(StreamResponse) -> Event,
+) -> (HeaderMap, impl Stream<Item = Result<Event, Infallible>>) {
     let span = tracing::Span::current();
     let start_time = Instant::now();
     metrics::increment_counter!("tgi_request_count");
@@ -348,7 +387,7 @@ async fn generate_stream(
     let compute_characters = req.inputs.chars().count();
 
     let mut headers = HeaderMap::new();
-    headers.insert("x-compute-type", "gpu+optimized".parse().unwrap());
+    headers.insert("x-compute-type", compute_type.parse().unwrap());
     headers.insert(
         "x-compute-characters",
         compute_characters.to_string().parse().unwrap(),
@@ -380,9 +419,11 @@ async fn generate_stream(
         } else {
             match infer.generate_stream(req).instrument(info_span!(parent: &span, "async_stream")).await {
                 // Keep permit as long as generate_stream lives
-                Ok((_permit, mut response_stream)) => {
+                Ok((_permit, _input_length, mut response_stream)) => {
+                    let mut index = 0;
                     // Server-Sent Event stream
                     while let Some(response) = response_stream.next().await {
+                        index += 1;
                         match response {
                             Ok(response) => {
                                 match response {
@@ -397,13 +438,14 @@ async fn generate_stream(
 
                                         // StreamResponse
                                         let stream_token = StreamResponse {
+                                            index,
                                             token,
                                             top_tokens,
                                             generated_text: None,
                                             details: None,
                                         };
-
-                                        yield Ok(Event::default().json_data(stream_token).unwrap())
+                                        let event = on_message_callback(stream_token);
+                                        yield Ok(event);
                                     }
                                     // Yield event for last token and compute timings
                                     InferStreamResponse::End {
@@ -459,13 +501,16 @@ async fn generate_stream(
                                         tracing::info!(parent: &span, "Success");
 
                                         let stream_token = StreamResponse {
+                                            index,
                                             token,
                                             top_tokens,
                                             generated_text: Some(output_text),
                                             details
                                         };
 
-                                        yield Ok(Event::default().json_data(stream_token).unwrap());
+
+                                        let event = on_message_callback(stream_token);
+                                        yield Ok(event);
                                         break;
                                     }
                                 }
@@ -496,7 +541,588 @@ async fn generate_stream(
         }
     };
 
-    (headers, Sse::new(stream).keep_alive(KeepAlive::default()))
+    (headers, stream)
+}
+
+/// Generate tokens
+#[utoipa::path(
+    post,
+    tag = "Text Generation Inference",
+    path = "/v1/completions",
+    request_body = CompletionRequest,
+    responses(
+    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
+    (status = 424, description = "Generation Error", body = ErrorResponse,
+    example = json ! ({"error": "Request failed during generation"})),
+    (status = 429, description = "Model is overloaded", body = ErrorResponse,
+    example = json ! ({"error": "Model is overloaded"})),
+    (status = 422, description = "Input validation error", body = ErrorResponse,
+    example = json ! ({"error": "Input validation error"})),
+    (status = 500, description = "Incomplete generation", body = ErrorResponse,
+    example = json ! ({"error": "Incomplete generation"})),
+    )
+    )]
+#[instrument(
+    skip_all,
+    fields(
+    // parameters = ? req.parameters,
+    total_time,
+    validation_time,
+    queue_time,
+    inference_time,
+    time_per_token,
+    seed,
+    )
+    )]
+async fn completions(
+    Extension(infer): Extension<Infer>,
+    Extension(compute_type): Extension<ComputeType>,
+    Extension(info): Extension<Info>,
+    Json(req): Json<CompletionRequest>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    metrics::increment_counter!("tgi_request_count");
+
+    let stream = req.stream;
+    let max_new_tokens = req.max_tokens.or(Some(100));
+    let seed = req.seed;
+
+    // if suffix is present throw an error
+    if req.suffix.is_some() {
+        metrics::increment_counter!("tgi_request_failure", "err" => "validation");
+        return Err((
+            StatusCode::UNPROCESSABLE_ENTITY,
+            Json(ErrorResponse {
+                error: "Suffix is not supported and can be achieved by preprocessing the prompt."
+                    .to_string(),
+                error_type: "suffix not supported".to_string(),
+            }),
+        ));
+    }
+
+    // build the request passing some parameters
+    let generate_request = GenerateRequest {
+        inputs: req.prompt.to_string(),
+        parameters: GenerateParameters {
+            best_of: None,
+            temperature: req.temperature,
+            repetition_penalty: req.repetition_penalty,
+            frequency_penalty: req.frequency_penalty,
+            top_k: None,
+            top_p: req.top_p,
+            typical_p: None,
+            do_sample: true,
+            max_new_tokens,
+            return_full_text: None,
+            stop: Vec::new(),
+            truncate: None,
+            watermark: false,
+            details: true,
+            decoder_input_details: !stream,
+            seed,
+            top_n_tokens: None,
+            grammar: None,
+        },
+    };
+
+    if stream {
+        let on_message_callback = move |stream_token: StreamResponse| {
+            let event = Event::default();
+
+            let current_time = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_else(|_| std::time::Duration::from_secs(0))
+                .as_secs();
+
+            event
+                .json_data(CompletionCompleteChunk {
+                    id: "".to_string(),
+                    object: "text_completion".to_string(),
+                    created: current_time,
+
+                    choices: vec![CompletionComplete {
+                        finish_reason: "".to_string(),
+                        index: 0,
+                        logprobs: None,
+                        text: stream_token.token.text,
+                    }],
+
+                    model: info.model_id.clone(),
+                    system_fingerprint: format!(
+                        "{}-{}",
+                        info.version,
+                        info.docker_label.unwrap_or("native")
+                    ),
+                })
+                .map_or_else(
+                    |e| {
+                        println!("Failed to serialize ChatCompletionChunk: {:?}", e);
+                        Event::default()
+                    },
+                    |data| data,
+                )
+        };
+
+        let (headers, response_stream) = generate_stream_internal(
+            infer,
+            compute_type,
+            Json(generate_request),
+            on_message_callback,
+        )
+        .await;
+
+        let sse = Sse::new(response_stream).keep_alive(KeepAlive::default());
+        Ok((headers, sse).into_response())
+    } else {
+        let (headers, Json(generation)) = generate(
+            Extension(infer),
+            Extension(compute_type),
+            Json(generate_request),
+        )
+        .await?;
+
+        let current_time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_else(|_| std::time::Duration::from_secs(0))
+            .as_secs();
+
+        let details = generation.details.ok_or((
+            // this should never happen but handle if details are missing unexpectedly
+            StatusCode::INTERNAL_SERVER_ERROR,
+            Json(ErrorResponse {
+                error: "No details in generation".to_string(),
+                error_type: "no details".to_string(),
+            }),
+        ))?;
+
+        let response = Completion {
+            id: "".to_string(),
+            object: "text_completion".to_string(),
+            created: current_time,
+            model: info.model_id.clone(),
+            system_fingerprint: format!(
+                "{}-{}",
+                info.version,
+                info.docker_label.unwrap_or("native")
+            ),
+            choices: vec![CompletionComplete {
+                finish_reason: details.finish_reason.to_string(),
+                index: 0,
+                logprobs: None,
+                text: generation.generated_text,
+            }],
+            usage: Usage {
+                prompt_tokens: details.prefill.len() as u32,
+                completion_tokens: details.generated_tokens,
+                total_tokens: details.prefill.len() as u32 + details.generated_tokens,
+            },
+        };
+
+        Ok((headers, Json(response)).into_response())
+    }
+}
+
+/// Generate tokens
+#[utoipa::path(
+    post,
+    tag = "Text Generation Inference",
+    path = "/v1/chat/completions",
+    request_body = ChatRequest,
+    responses(
+    (status = 200, description = "Generated Text", body = ChatCompletionChunk),
+    (status = 424, description = "Generation Error", body = ErrorResponse,
+    example = json ! ({"error": "Request failed during generation"})),
+    (status = 429, description = "Model is overloaded", body = ErrorResponse,
+    example = json ! ({"error": "Model is overloaded"})),
+    (status = 422, description = "Input validation error", body = ErrorResponse,
+    example = json ! ({"error": "Input validation error"})),
+    (status = 500, description = "Incomplete generation", body = ErrorResponse,
+    example = json ! ({"error": "Incomplete generation"})),
+    )
+    )]
+#[instrument(
+    skip_all,
+    fields(
+    // parameters = ? req.parameters,
+    total_time,
+    validation_time,
+    queue_time,
+    inference_time,
+    time_per_token,
+    seed,
+    )
+    )]
+async fn chat_completions(
+    Extension(infer): Extension<Infer>,
+    Extension(compute_type): Extension<ComputeType>,
+    Extension(info): Extension<Info>,
+    Json(req): Json<ChatRequest>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    metrics::increment_counter!("tgi_request_count");
+
+    let stream = req.stream;
+    let max_new_tokens = req.max_tokens.or(Some(100));
+    let repetition_penalty = req
+        .presence_penalty
+        // rescale repetition_penalty from (-2.0, 2.0) to (0.0, 4.0)
+        .map(|x| x + 2.0);
+    let logprobs = req.logprobs.unwrap_or(false);
+    let seed = req.seed;
+    let stop = req.stop.unwrap_or_default();
+
+    // apply chat template to flatten the request into a single input
+    let mut inputs = match infer.apply_chat_template(req.messages) {
+        Ok(inputs) => inputs,
+        Err(err) => {
+            metrics::increment_counter!("tgi_request_failure", "err" => "validation");
+            tracing::error!("{err}");
+            return Err((
+                StatusCode::UNPROCESSABLE_ENTITY,
+                Json(ErrorResponse {
+                    error: err.to_string(),
+                    error_type: err.error_type().to_string(),
+                }),
+            ));
+        }
+    };
+
+    let tool_grammar = if let Some((req_tools, tool_choice)) = req.tools.zip(req.tool_choice) {
+        let tool_prompt = req.tool_prompt.unwrap_or_default();
+        let tools_to_use = match tool_choice {
+            ToolType::FunctionName(name) => {
+                vec![req_tools
+                    .iter()
+                    .find(|tool| tool.function.name == *name)
+                    .ok_or_else(|| {
+                        (
+                            StatusCode::UNPROCESSABLE_ENTITY,
+                            Json(ErrorResponse {
+                                error: "Tool choice not found in tool names".to_string(),
+                                error_type: "Tool not found".to_string(),
+                            }),
+                        )
+                    })?
+                    .clone()]
+            }
+            ToolType::OneOf => req_tools.to_owned(),
+        };
+
+        let functions: HashMap<String, Value> = tools_to_use
+            .iter()
+            .map(|tool| {
+                let func = tool.function.clone();
+                (func.name, func.parameters)
+            })
+            .collect();
+
+        let tools = Tools {
+            functions_map: FunctionsMap { functions },
+            properties: Properties {
+                function: tools_to_use
+                    .iter()
+                    .map(|tool| FunctionRef {
+                        ref_path: format!("#/$functions/{}", tool.function.name.clone()),
+                    })
+                    .collect(),
+            },
+        };
+
+        let tools_str = serde_json::to_string(&tools).map_err(|e| {
+            (
+                StatusCode::UNPROCESSABLE_ENTITY,
+                Json(ErrorResponse {
+                    error: e.to_string(),
+                    error_type: "Input validation error".to_string(),
+                }),
+            )
+        })?;
+        inputs = format!("{inputs}{tool_prompt}{tools_str}");
+        Some(GrammarType::Json(serde_json::json!(tools)))
+    } else {
+        None
+    };
+
+    // build the request passing some parameters
+    let generate_request = GenerateRequest {
+        inputs: inputs.to_string(),
+        parameters: GenerateParameters {
+            best_of: None,
+            temperature: req.temperature,
+            repetition_penalty,
+            frequency_penalty: req.frequency_penalty,
+            top_k: None,
+            top_p: req.top_p,
+            typical_p: None,
+            do_sample: true,
+            max_new_tokens,
+            return_full_text: None,
+            stop,
+            truncate: None,
+            watermark: false,
+            details: true,
+            decoder_input_details: !stream,
+            seed,
+            top_n_tokens: req.top_logprobs,
+            grammar: tool_grammar.clone(),
+        },
+    };
+
+    // static values that will be returned in all cases
+    let model_id = info.model_id.clone();
+    let system_fingerprint = format!("{}-{}", info.version, info.docker_label.unwrap_or("native"));
+
+    // switch on stream
+    if stream {
+        // pass this callback to the stream generation and build the required event structure
+        let on_message_callback = move |stream_token: StreamResponse| {
+            let event = Event::default();
+
+            let current_time = std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_else(|_| std::time::Duration::from_secs(0))
+                .as_secs();
+
+            let logprobs = logprobs.then(|| {
+                ChatCompletionLogprobs::from((stream_token.token.clone(), stream_token.top_tokens))
+            });
+
+            // replace the content with the tool calls if grammar is present
+            let (content, tool_calls) = if tool_grammar.is_some() {
+                (None, Some(vec![stream_token.token.text]))
+            } else {
+                (Some(stream_token.token.text), None)
+            };
+
+            event
+                .json_data(ChatCompletionChunk::new(
+                    model_id.clone(),
+                    system_fingerprint.clone(),
+                    content,
+                    tool_calls,
+                    current_time,
+                    logprobs,
+                    stream_token.details.map(|d| d.finish_reason.to_string()),
+                ))
+                .map_or_else(
+                    |e| {
+                        println!("Failed to serialize ChatCompletionChunk: {:?}", e);
+                        Event::default()
+                    },
+                    |data| data,
+                )
+        };
+
+        let (headers, response_stream) = generate_stream_internal(
+            infer,
+            compute_type,
+            Json(generate_request),
+            on_message_callback,
+        )
+        .await;
+        let sse = Sse::new(response_stream).keep_alive(KeepAlive::default());
+        Ok((headers, sse).into_response())
+    } else {
+        let (headers, Json(generation)) = generate(
+            Extension(infer),
+            Extension(compute_type),
+            Json(generate_request),
+        )
+        .await?;
+
+        let current_time = std::time::SystemTime::now()
+            .duration_since(std::time::UNIX_EPOCH)
+            .unwrap_or_else(|_| std::time::Duration::from_secs(0))
+            .as_secs();
+
+        let (tool_calls, output) = if tool_grammar.is_some() {
+            // gen_text should be valid json
+            let gen_text_value: Value =
+                serde_json::from_str(&generation.generated_text).map_err(|e| {
+                    (
+                        StatusCode::UNPROCESSABLE_ENTITY,
+                        Json(ErrorResponse {
+                            error: e.to_string(),
+                            error_type: "Input validation error".to_string(),
+                        }),
+                    )
+                })?;
+
+            let tool_calls = vec![ToolCall {
+                id: 0,
+                r#type: "function".to_string(),
+                function: FunctionDefinition {
+                    description: None,
+                    name: "tools".to_string(),
+                    parameters: gen_text_value.get("function").map_or_else(
+                        || {
+                            serde_json::from_str(&generation.generated_text).map_err(|e| {
+                                (
+                                    StatusCode::UNPROCESSABLE_ENTITY,
+                                    Json(ErrorResponse {
+                                        error: e.to_string(),
+                                        error_type: "Input validation error".to_string(),
+                                    }),
+                                )
+                            })
+                        },
+                        |f| Ok(f.clone()),
+                    )?,
+                },
+            }];
+            (Some(tool_calls), None)
+        } else {
+            (None, Some(generation.generated_text))
+        };
+        // build the complete response object with the full text
+        let response = ChatCompletion::new(
+            model_id,
+            system_fingerprint,
+            output,
+            current_time,
+            generation.details.unwrap(),
+            logprobs,
+            tool_calls,
+        );
+
+        // wrap generation inside a Vec to match api-inference
+        Ok((headers, Json(response)).into_response())
+    }
+}
+
+/// Generate tokens from Vertex request
+#[utoipa::path(
+    post,
+    tag = "Text Generation Inference",
+    path = "/vertex",
+    request_body = VertexRequest,
+    responses(
+    (status = 200, description = "Generated Text", body = VertexResponse),
+    (status = 424, description = "Generation Error", body = ErrorResponse,
+    example = json ! ({"error": "Request failed during generation"})),
+    (status = 429, description = "Model is overloaded", body = ErrorResponse,
+    example = json ! ({"error": "Model is overloaded"})),
+    (status = 422, description = "Input validation error", body = ErrorResponse,
+    example = json ! ({"error": "Input validation error"})),
+    (status = 500, description = "Incomplete generation", body = ErrorResponse,
+    example = json ! ({"error": "Incomplete generation"})),
+    )
+    )]
+#[instrument(
+    skip_all,
+    fields(
+        total_time,
+        validation_time,
+        queue_time,
+        inference_time,
+        time_per_token,
+        seed,
+    )
+)]
+async fn vertex_compatibility(
+    Extension(infer): Extension<Infer>,
+    Extension(compute_type): Extension<ComputeType>,
+    Json(req): Json<VertexRequest>,
+) -> Result<Response, (StatusCode, Json<ErrorResponse>)> {
+    metrics::increment_counter!("tgi_request_count");
+
+    // check that theres at least one instance
+    if req.instances.is_empty() {
+        return Err((
+            StatusCode::UNPROCESSABLE_ENTITY,
+            Json(ErrorResponse {
+                error: "Input validation error".to_string(),
+                error_type: "Input validation error".to_string(),
+            }),
+        ));
+    }
+
+    // Process all instances
+    let predictions = req
+        .instances
+        .iter()
+        .map(|instance| {
+            let generate_request = GenerateRequest {
+                inputs: instance.inputs.clone(),
+                parameters: GenerateParameters {
+                    do_sample: true,
+                    max_new_tokens: instance.parameters.as_ref().and_then(|p| p.max_new_tokens),
+                    seed: instance.parameters.as_ref().and_then(|p| p.seed),
+                    details: true,
+                    decoder_input_details: true,
+                    ..Default::default()
+                },
+            };
+
+            async {
+                generate(
+                    Extension(infer.clone()),
+                    Extension(compute_type.clone()),
+                    Json(generate_request),
+                )
+                .await
+                .map(|(_, Json(generation))| generation.generated_text)
+                .map_err(|_| {
+                    (
+                        StatusCode::INTERNAL_SERVER_ERROR,
+                        Json(ErrorResponse {
+                            error: "Incomplete generation".into(),
+                            error_type: "Incomplete generation".into(),
+                        }),
+                    )
+                })
+            }
+        })
+        .collect::<FuturesUnordered<_>>()
+        .try_collect::<Vec<_>>()
+        .await?;
+
+    let response = VertexResponse { predictions };
+    Ok((HeaderMap::new(), Json(response)).into_response())
+}
+
+/// Tokenize inputs
+#[utoipa::path(
+    post,
+    tag = "Text Generation Inference",
+    path = "/tokenize",
+    request_body = GenerateRequest,
+    responses(
+    (status = 200, description = "Tokenized ids", body = TokenizeResponse),
+    (status = 404, description = "No tokenizer found", body = ErrorResponse,
+    example = json ! ({"error": "No fast tokenizer available"})),
+    )
+    )]
+#[instrument(skip_all)]
+async fn tokenize(
+    Extension(infer): Extension<Infer>,
+    Json(req): Json<GenerateRequest>,
+) -> Result<Json<TokenizeResponse>, (StatusCode, Json<ErrorResponse>)> {
+    let input = req.inputs.clone();
+    let encoding = infer.tokenize(req).await?;
+    if let Some(encoding) = encoding {
+        let tokens: Vec<SimpleToken> = encoding
+            .get_ids()
+            .iter()
+            .zip(encoding.get_offsets())
+            .map(|(&id, &(start, stop))| {
+                let text: String = input.chars().skip(start).take(stop - start).collect();
+                SimpleToken {
+                    id,
+                    text,
+                    start,
+                    stop,
+                }
+            })
+            .collect();
+        Ok(Json(TokenizeResponse(tokens)))
+    } else {
+        Err((
+            StatusCode::NOT_FOUND,
+            Json(ErrorResponse {
+                error: "No fast tokenizer or tokenizer.json for this model".to_string(),
+                error_type: "no fast tokenizer".to_string(),
+            }),
+        ))
+    }
 }
 
 /// Prometheus metrics scrape endpoint
@@ -510,6 +1136,9 @@ async fn metrics(prom_handle: Extension<PrometheusHandle>) -> String {
     prom_handle.render()
 }
 
+#[derive(Clone, Debug)]
+pub(crate) struct ComputeType(String);
+
 /// Serving method
 #[allow(clippy::too_many_arguments)]
 pub async fn run(
@@ -526,14 +1155,19 @@ pub async fn run(
     max_batch_prefill_tokens: u32,
     max_batch_total_tokens: u32,
     max_waiting_tokens: usize,
+    max_batch_size: Option<usize>,
     client: ShardedClient,
     tokenizer: Option<Tokenizer>,
+    config: Option<Config>,
     validation_workers: usize,
     addr: SocketAddr,
     allow_origin: Option<AllowOrigin>,
     ngrok: bool,
     ngrok_authtoken: Option<String>,
     ngrok_edge: Option<String>,
+    tokenizer_config: HubTokenizerConfig,
+    messages_api_enabled: bool,
+    grammar_support: bool,
 ) -> Result<(), axum::BoxError> {
     // OpenAPI documentation
     #[derive(OpenApi)]
@@ -544,6 +1178,9 @@ pub async fn run(
     compat_generate,
     generate,
     generate_stream,
+    chat_completions,
+    completions,
+    tokenize,
     metrics,
     ),
     components(
@@ -551,16 +1188,40 @@ pub async fn run(
     Info,
     CompatGenerateRequest,
     GenerateRequest,
+    GrammarType,
+    ChatRequest,
+    Message,
+    ChatCompletionComplete,
+    ChatCompletionChoice,
+    ChatCompletionDelta,
+    ChatCompletionChunk,
+    ChatCompletionLogprob,
+    ChatCompletionLogprobs,
+    ChatCompletionTopLogprob,
+    ChatCompletion,
+    CompletionRequest,
+    CompletionComplete,
+    CompletionCompleteChunk,
     GenerateParameters,
     PrefillToken,
     Token,
     GenerateResponse,
+    TokenizeResponse,
+    SimpleToken,
     BestOfSequence,
     Details,
     FinishReason,
     StreamResponse,
     StreamDetails,
     ErrorResponse,
+    GrammarType,
+    Usage,
+    DeltaToolCall,
+    ToolType,
+    Tool,
+    ToolCall,
+    Function,
+    FunctionDefinition,
     )
     ),
     tags(
@@ -580,11 +1241,13 @@ pub async fn run(
     let validation = Validation::new(
         validation_workers,
         tokenizer,
+        config,
         max_best_of,
         max_stop_sequences,
         max_top_n_tokens,
         max_input_length,
         max_total_tokens,
+        grammar_support,
     );
     let generation_health = Arc::new(AtomicBool::new(false));
     let health_ext = Health::new(client.clone(), generation_health.clone());
@@ -595,12 +1258,15 @@ pub async fn run(
         max_batch_prefill_tokens,
         max_batch_total_tokens,
         max_waiting_tokens,
+        max_batch_size,
         max_concurrent_requests,
         shard_info.requires_padding,
         max_input_length as u32,
         max_total_tokens as u32,
         shard_info.window_size,
+        shard_info.speculate,
         generation_health,
+        tokenizer_config,
     );
 
     // Duration buckets
@@ -632,6 +1298,9 @@ pub async fn run(
     // Batch size buckets
     let batch_size_matcher = Matcher::Full(String::from("tgi_batch_next_size"));
     let batch_size_buckets: Vec<f64> = (0..1024).map(|x| (x + 1) as f64).collect();
+    // Speculated tokens buckets
+    let skipped_matcher = Matcher::Full(String::from("tgi_request_skipped_tokens"));
+    let skipped_buckets: Vec<f64> = (0..shard_info.speculate + 1).map(|x| x as f64).collect();
 
     // Prometheus handler
     let builder = PrometheusBuilder::new()
@@ -644,6 +1313,8 @@ pub async fn run(
         .set_buckets_for_metric(max_new_tokens_matcher, &max_new_tokens_buckets)
         .unwrap()
         .set_buckets_for_metric(batch_size_matcher, &batch_size_buckets)
+        .unwrap()
+        .set_buckets_for_metric(skipped_matcher, &skipped_buckets)
         .unwrap();
     let prom_handle = builder
         .install_recorder()
@@ -671,34 +1342,91 @@ pub async fn run(
         waiting_served_ratio,
         max_batch_total_tokens,
         max_waiting_tokens,
+        max_batch_size,
         validation_workers,
         version: env!("CARGO_PKG_VERSION"),
         sha: option_env!("VERGEN_GIT_SHA"),
         docker_label: option_env!("DOCKER_LABEL"),
     };
 
-    // Create router
-    let app = Router::new()
-        .merge(SwaggerUi::new("/docs").url("/api-doc/openapi.json", ApiDoc::openapi()))
-        // Base routes
+    // Define VertextApiDoc conditionally only if the "google" feature is enabled
+    let doc = {
+        // avoid `mut` if possible
+        #[cfg(feature = "google")]
+        {
+            use crate::VertexInstance;
+
+            #[derive(OpenApi)]
+            #[openapi(
+                paths(vertex_compatibility),
+                components(schemas(VertexInstance, VertexRequest, VertexResponse))
+            )]
+            struct VertextApiDoc;
+
+            // limiting mutability to the smallest scope necessary
+            let mut doc = ApiDoc::openapi();
+            doc.merge(VertextApiDoc::openapi());
+            doc
+        }
+        #[cfg(not(feature = "google"))]
+        ApiDoc::openapi()
+    };
+
+    // Configure Swagger UI
+    let swagger_ui = SwaggerUi::new("/docs").url("/api-doc/openapi.json", doc);
+
+    // Define base and health routes
+    let base_routes = Router::new()
         .route("/", post(compat_generate))
+        .route("/", get(health))
         .route("/info", get(get_model_info))
         .route("/generate", post(generate))
         .route("/generate_stream", post(generate_stream))
-        // AWS Sagemaker route
-        .route("/invocations", post(compat_generate))
-        // Base Health route
+        .route("/v1/chat/completions", post(chat_completions))
+        .route("/v1/completions", post(completions))
+        .route("/vertex", post(vertex_compatibility))
+        .route("/tokenize", post(tokenize))
         .route("/health", get(health))
-        // Inference API health route
-        .route("/", get(health))
-        // AWS Sagemaker health route
         .route("/ping", get(health))
-        // Prometheus metrics route
-        .route("/metrics", get(metrics))
+        .route("/metrics", get(metrics));
+
+    // Conditional AWS Sagemaker route
+    let aws_sagemaker_route = if messages_api_enabled {
+        Router::new().route("/invocations", post(chat_completions)) // Use 'chat_completions' for OAI_ENABLED
+    } else {
+        Router::new().route("/invocations", post(compat_generate)) // Use 'compat_generate' otherwise
+    };
+
+    let compute_type =
+        ComputeType(std::env::var("COMPUTE_TYPE").unwrap_or("gpu+optimized".to_string()));
+
+    // Combine routes and layers
+    let mut app = Router::new()
+        .merge(swagger_ui)
+        .merge(base_routes)
+        .merge(aws_sagemaker_route);
+
+    #[cfg(feature = "google")]
+    {
+        tracing::info!("Built with `google` feature");
+        tracing::info!(
+            "Environment variables `AIP_PREDICT_ROUTE` and `AIP_HEALTH_ROUTE` will be respected."
+        );
+        if let Ok(env_predict_route) = std::env::var("AIP_PREDICT_ROUTE") {
+            app = app.route(&env_predict_route, post(vertex_compatibility));
+        }
+        if let Ok(env_health_route) = std::env::var("AIP_HEALTH_ROUTE") {
+            app = app.route(&env_health_route, get(health));
+        }
+    }
+
+    // add layers after routes
+    app = app
         .layer(Extension(info))
         .layer(Extension(health_ext.clone()))
         .layer(Extension(compat_return_full_text))
         .layer(Extension(infer))
+        .layer(Extension(compute_type))
         .layer(Extension(prom_handle.clone()))
         .layer(OtelAxumLayer::default())
         .layer(cors_layer);
@@ -814,6 +1542,7 @@ impl From<InferError> for (StatusCode, Json<ErrorResponse>) {
             InferError::Overloaded(_) => StatusCode::TOO_MANY_REQUESTS,
             InferError::ValidationError(_) => StatusCode::UNPROCESSABLE_ENTITY,
             InferError::IncompleteGeneration => StatusCode::INTERNAL_SERVER_ERROR,
+            InferError::TemplateError(_) => StatusCode::UNPROCESSABLE_ENTITY,
         };
 
         (
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 0ec93109..4f3036e8 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -1,17 +1,26 @@
 /// Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
+use crate::config::Config;
 /// Payload validation logic
 use crate::validation::ValidationError::{BestOfSampling, BestOfSeed, EmptyInput};
-use crate::{GenerateParameters, GenerateRequest};
+use crate::{GenerateParameters, GenerateRequest, GrammarType};
+use jsonschema::{Draft, JSONSchema};
 use rand::{thread_rng, Rng};
 use std::env;
-use text_generation_client::{NextTokenChooserParameters, StoppingCriteriaParameters};
+use serde_json::Value;
+use std::io::Cursor;
+use text_generation_client::{
+    GrammarType as ProtoGrammarType, NextTokenChooserParameters, StoppingCriteriaParameters,
+};
 use thiserror::Error;
 use tokenizers::tokenizer::Tokenizer;
-use tokenizers::TruncationDirection;
+// use tokenizers::TruncationDirection;
+use base64::{engine::general_purpose::STANDARD, Engine};
+use image::{io::Reader as ImageReader, ImageFormat};
 use tokio::sync::mpsc;
 use tokio::sync::oneshot;
 use tracing::{instrument, Span};
+use {once_cell::sync::Lazy, regex::Regex};
 
 /// Validation
 #[derive(Debug, Clone)]
@@ -22,20 +31,24 @@ pub struct Validation {
     max_top_n_tokens: u32,
     max_input_length: usize,
     max_total_tokens: usize,
+    disable_grammar_support: bool,
     /// Channel to communicate with the background tokenization task
     sender: Option<mpsc::UnboundedSender<TokenizerRequest>>,
     skip_tokenizer_in_tgi: bool,
 }
 
 impl Validation {
+    #[allow(clippy::too_many_arguments)]
     pub(crate) fn new(
         workers: usize,
         tokenizer: Option<Tokenizer>,
+        config: Option<Config>,
         max_best_of: usize,
         max_stop_sequences: usize,
         max_top_n_tokens: u32,
         max_input_length: usize,
         max_total_tokens: usize,
+        disable_grammar_support: bool,
     ) -> Self {
         // If we have a fast tokenizer
         let sender = if let Some(tokenizer) = tokenizer {
@@ -46,12 +59,13 @@ impl Validation {
             // Create workers
             for _ in 0..workers {
                 let tokenizer_clone = tokenizer.clone();
+                let config_clone = config.clone();
                 let (tokenizer_sender, tokenizer_receiver) = mpsc::unbounded_channel();
                 senders.push(tokenizer_sender);
 
                 // Spawn worker
                 tokio::task::spawn_blocking(move || {
-                    tokenizer_worker(tokenizer_clone, tokenizer_receiver)
+                    tokenizer_worker(tokenizer_clone, config_clone, tokenizer_receiver)
                 });
             }
 
@@ -74,17 +88,17 @@ impl Validation {
             max_top_n_tokens,
             max_input_length,
             max_total_tokens,
+            disable_grammar_support,
             skip_tokenizer_in_tgi,
         }
     }
 
     #[instrument(skip(self, inputs))]
-    async fn validate_input(
+    pub async fn tokenize(
         &self,
         inputs: String,
         truncate: Option<usize>,
-        max_new_tokens: Option<u32>,
-    ) -> Result<(String, usize, u32), ValidationError> {
+    ) -> Result<Option<(tokenizers::Encoding, String)>, ValidationError> {
         // If we have a fast tokenizer
         if let Some(sender) = &self.sender {
             // Create response channel
@@ -97,8 +111,23 @@ impl Validation {
 
             // Await on response channel
             // Unwrap is safe here
-            let (inputs, _) = response_receiver.await.unwrap()?;
+            let encoding = response_receiver.await.unwrap()?;
+            Ok(Some(encoding))
+        } else {
+            Ok(None)
+        }
+    }
 
+    #[instrument(skip(self, inputs))]
+    async fn validate_input(
+        &self,
+        inputs: String,
+        truncate: Option<usize>,
+        max_new_tokens: Option<u32>,
+    ) -> Result<(String, usize, u32), ValidationError> {
+        // If we have a fast tokenizer
+        if let Some((encoding, inputs)) = self.tokenize(inputs.clone(), truncate).await? {
+            // Create response channel
             let input_length = if self.skip_tokenizer_in_tgi {
                 inputs.chars().filter(|&c| c == ',').count() + 1
             } else {
@@ -173,6 +202,7 @@ impl Validation {
             best_of,
             temperature,
             repetition_penalty,
+            frequency_penalty,
             top_k,
             top_p,
             typical_p,
@@ -184,6 +214,7 @@ impl Validation {
             watermark,
             decoder_input_details,
             top_n_tokens,
+            grammar,
             ..
         } = request.parameters;
 
@@ -209,12 +240,17 @@ impl Validation {
             return Err(ValidationError::RepetitionPenalty);
         }
 
+        let frequency_penalty = frequency_penalty.unwrap_or(0.0);
+        if !(-2.0..=2.0).contains(&frequency_penalty) {
+            return Err(ValidationError::FrequencyPenalty);
+        }
+
         // TODO: enable watermark with fp8 quantization
         let quantization_enabled = env::var("QUANT_CONFIG")
             .ok()
             .map_or(false, |value| !value.is_empty());
         if watermark && quantization_enabled {
-            return Err(ValidationError::WatermarkWithQuantization)
+            return Err(ValidationError::WatermarkWithQuantization);
         }
 
         // Different because the proto default value is not a valid value
@@ -297,15 +333,61 @@ impl Validation {
             .validate_input(request.inputs, truncate, max_new_tokens)
             .await?;
 
+        // TODO: we should build the FSM here and pass the compiled FSM instead of the grammar
+        // NOTE: this is currently difficult because we need the tokenizer in Python to build
+        // the FSM and we'd have to load a copy of the tokenizer into our Pyo3 instance which
+        // may be slow and memory intensive. Best case is to have a Rust implementation of the FSM
+        // compiler and use that to build the FSM here.
+
+        // Validate grammar and unpack the grammar and type for the proto message
+        let (grammar, grammar_type) = match grammar {
+            Some(grammar) => {
+                // Ensure that grammar is not set if it's not supported
+                if self.disable_grammar_support {
+                    return Err(ValidationError::Grammar);
+                }
+                match grammar {
+                    GrammarType::Json(json) => {
+                        let json = match json {
+                            // if value is a string, we need to parse it again to make sure its
+                            // a valid json
+                            Value::String(s) => serde_json::from_str(&s)
+                                .map_err(|e| ValidationError::InvalidGrammar(e.to_string())),
+                            Value::Object(_) => Ok(json),
+                            _ => Err(ValidationError::Grammar),
+                        }?;
+
+                        // Check if the json is a valid JSONSchema
+                        JSONSchema::options()
+                            .with_draft(Draft::Draft202012)
+                            .compile(&json)
+                            .map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?;
+
+                        (
+                            // Serialize json to string
+                            serde_json::to_string(&json)
+                                .map_err(|e| ValidationError::InvalidGrammar(e.to_string()))?,
+                            ProtoGrammarType::Json.into(),
+                        )
+                    }
+                    GrammarType::Regex(regex) => (regex, ProtoGrammarType::Regex.into()),
+                }
+            }
+            None => (String::new(), ProtoGrammarType::None.into()),
+        };
+
         let parameters = NextTokenChooserParameters {
             temperature,
             repetition_penalty,
+            frequency_penalty,
             top_k,
             top_p,
             typical_p,
             do_sample,
             seed,
             watermark,
+            grammar,
+            grammar_type,
         };
         let stopping_parameters = StoppingCriteriaParameters {
             max_new_tokens,
@@ -357,53 +439,148 @@ async fn round_robin_task(
 }
 
 /// Start tokenization workers
-fn tokenizer_worker(tokenizer: Tokenizer, mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>) {
+fn tokenizer_worker(
+    tokenizer: Tokenizer,
+    config: Option<Config>,
+    mut receiver: mpsc::UnboundedReceiver<TokenizerRequest>,
+) {
     // Loop over requests
     while let Some(((inputs, truncate), response_tx, parent_span)) = receiver.blocking_recv() {
         parent_span.in_scope(|| {
             response_tx
-                .send(prepare_input(inputs, truncate, &tokenizer))
+                .send(prepare_input(inputs, truncate, &tokenizer, &config))
                 .unwrap_or(())
         })
     }
 }
 
+fn format_from_mimetype(mimetype: &str) -> Option<ImageFormat> {
+    match mimetype {
+        "image/png" => Some(ImageFormat::Png),
+        "image/jpeg" => Some(ImageFormat::Jpeg),
+        "image/jpg" => Some(ImageFormat::Jpeg),
+        "image/gif" => Some(ImageFormat::Gif),
+        "image/webp" => Some(ImageFormat::WebP),
+        "image/tiff" => Some(ImageFormat::Tiff),
+        // "image/pnm"=>Some(ImageFormat::Pnm),
+        // "image/tga"=>Some(ImageFormat::Tga),
+        // "image/dds"=>Some(ImageFormat::Dds),
+        // "image/bmp"=>Some(ImageFormat::Bmp),
+        // "image/ico"=>Some(ImageFormat::Ico),
+        // "image/x-exr"=>Some(ImageFormat::OpenExr),
+        _ => None,
+    }
+}
+fn format_to_mimetype(format: ImageFormat) -> String {
+    match format {
+        ImageFormat::Png => "image/png",
+        ImageFormat::Jpeg => "image/jpeg",
+        ImageFormat::Gif => "image/gif",
+        ImageFormat::WebP => "image/webp",
+        ImageFormat::Tiff => "image/tiff",
+        _ => "application/octet-stream",
+    }
+    .to_string()
+}
+
+fn fetch_image(input: &str) -> Result<(String, usize, usize), ValidationError> {
+    if input.starts_with("![](http://") || input.starts_with("![](https://") {
+        let url = &input["![](".len()..input.len() - 1];
+        let data = reqwest::blocking::get(url)?.bytes()?;
+
+        let format = image::guess_format(&data)?;
+        // TODO Remove this clone
+        let img = ImageReader::with_format(Cursor::new(data.clone()), format).decode()?;
+        let height: usize = img.height().try_into()?;
+        let width: usize = img.width().try_into()?;
+        let mimetype = format_to_mimetype(format);
+        let encoded = STANDARD.encode(data);
+        let data_uri = format!("![](data:{mimetype};base64,{encoded})");
+        Ok((data_uri, height, width))
+    } else if input.starts_with("![](data:") {
+        // Remove ![](....)
+        let content = &input["![](data:".len()..input.len() - 1];
+        let tokens: Vec<_> = content.split(';').collect();
+        if tokens.len() != 2 {
+            return Err(ValidationError::InvalidImageContent(content.to_string()));
+        }
+        let mimetype = tokens[0];
+        let content = tokens[1];
+
+        if !content.starts_with("base64,") {
+            return Err(ValidationError::InvalidImageContent(content.to_string()));
+        }
+
+        let data = STANDARD.decode(content["base64,".len()..].as_bytes())?;
+        let img = if let Some(format) = format_from_mimetype(mimetype) {
+            ImageReader::with_format(Cursor::new(data), format).decode()?
+        } else {
+            ImageReader::new(Cursor::new(data))
+                .with_guessed_format()
+                .map_err(|_io_error| ValidationError::InvalidImageContent(content.to_string()))?
+                .decode()?
+        };
+
+        let height: usize = img.height().try_into()?;
+        let width: usize = img.width().try_into()?;
+        Ok((input.to_string(), height, width))
+    } else {
+        Err(ValidationError::InvalidImageContent(input.to_string()))
+    }
+}
+
 /// Get input length and optionally truncate it
 fn prepare_input(
-    inputs: String,
-    truncate: Option<usize>,
+    mut inputs: String,
+    _truncate: Option<usize>,
     tokenizer: &Tokenizer,
-) -> Result<(String, usize), ValidationError> {
-    // Get the number of tokens in the input
-    let mut encoding = tokenizer
-        .encode(inputs.clone(), true)
-        .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
-
-    // Optionally truncate
-    let (inputs, input_length) = match truncate {
-        // Truncate is some and < encoding length
-        Some(truncate) if truncate < encoding.len() => {
-            // truncate encoding and decode new inputs
-            encoding.truncate(truncate, 0, TruncationDirection::Left);
-            let inputs = tokenizer
-                .decode(encoding.get_ids(), false)
-                .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
-            (inputs, encoding.len())
+    config: &Option<Config>,
+) -> Result<(tokenizers::Encoding, String), ValidationError> {
+    static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"!\[\]\([^\)]*\)").unwrap());
+    let tokenizer_query = match config {
+        Some(Config::LlavaNext(config)) => {
+            let mut modified_inputs = String::with_capacity(inputs.len());
+            let mut tokenizer_query = String::with_capacity(inputs.len());
+            let mut start = 0;
+            for chunk in RE.find_iter(&inputs) {
+                let chunk_start = chunk.start();
+                let chunk_end = chunk.end();
+                if chunk_start != start {
+                    modified_inputs.push_str(&inputs[start..chunk_start]);
+                    tokenizer_query.push_str(&inputs[start..chunk_start]);
+                }
+                let (image_uri, height, width) = fetch_image(&inputs[chunk_start..chunk_end])?;
+                let slots = config.get_number_of_features(height, width);
+                tokenizer_query.push_str(&"<image>".repeat(slots));
+                modified_inputs.push_str(&image_uri);
+                start = chunk_end;
+            }
+            if start != inputs.len() - 1 {
+                modified_inputs.push_str(&inputs[start..]);
+                tokenizer_query.push_str(&inputs[start..]);
+            }
+            inputs = modified_inputs;
+            tokenizer_query
         }
-        // Nothing to do
-        _ => (inputs, encoding.len()),
+        Some(Config::Idefics) => RE.replace_all(&inputs, "<image>").into(),
+        _ => inputs.clone(),
     };
 
-    Ok((inputs, input_length))
+    // Get the number of tokens in the input
+    let encoding = tokenizer
+        .encode(tokenizer_query, true)
+        .map_err(|err| ValidationError::Tokenizer(err.to_string()))?;
+
+    Ok((encoding, inputs))
 }
 
 type TokenizerRequest = (
     (String, Option<usize>),
-    oneshot::Sender<Result<(String, usize), ValidationError>>,
+    oneshot::Sender<Result<(tokenizers::Encoding, String), ValidationError>>,
     Span,
 );
 
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub(crate) struct ValidGenerateRequest {
     pub inputs: String,
     pub input_length: u32,
@@ -436,6 +613,8 @@ pub enum ValidationError {
     Temperature,
     #[error("`repetition_penalty` must be strictly positive")]
     RepetitionPenalty,
+    #[error("`frequency_penalty` must be >= -2.0 and <= 2.0")]
+    FrequencyPenalty,
     #[error("`top_p` must be > 0.0 and < 1.0")]
     TopP,
     #[error("`top_k` must be strictly positive")]
@@ -460,6 +639,20 @@ pub enum ValidationError {
     StopSequence(usize, usize),
     #[error("tokenizer error {0}")]
     Tokenizer(String),
+    #[error("grammar is not supported")]
+    Grammar,
+    #[error("grammar is not valid: {0}")]
+    InvalidGrammar(String),
+    #[error("base64 encoding is invalid: {0}")]
+    InvalidBase64(#[from] base64::DecodeError),
+    #[error("invalid image: {0}")]
+    InvalidImage(#[from] image::ImageError),
+    #[error("invalid integer: {0}")]
+    InvalidInt(#[from] core::num::TryFromIntError),
+    #[error("invalid image content: {0}")]
+    InvalidImageContent(String),
+    #[error("Could not fetch image: {0}")]
+    FailedFetchImage(#[from] reqwest::Error),
     #[error("`watermark` = true is not allowed with FP8 quantization.")]
     WatermarkWithQuantization,
 }
@@ -479,14 +672,18 @@ mod tests {
         let max_input_length = 5;
         let max_total_tokens = 6;
         let workers = 1;
+        let disable_grammar_support = true;
+        let config = None;
         let validation = Validation::new(
             workers,
             tokenizer,
+            config,
             max_best_of,
             max_stop_sequence,
             max_top_n_tokens,
             max_input_length,
             max_total_tokens,
+            disable_grammar_support,
         );
 
         let max_new_tokens = 10;
@@ -495,7 +692,7 @@ mod tests {
             .await
         {
             Err(ValidationError::MaxNewTokens(1, 10)) => (),
-            _ => panic!("Unexpected not max new tokens"),
+            r => panic!("Unexpected not max new tokens: {r:?}"),
         }
     }
 
@@ -507,15 +704,19 @@ mod tests {
         let max_top_n_tokens = 4;
         let max_input_length = 5;
         let max_total_tokens = 6;
+        let disable_grammar_support = true;
         let workers = 1;
+        let config = None;
         let validation = Validation::new(
             workers,
             tokenizer,
+            config,
             max_best_of,
             max_stop_sequence,
             max_top_n_tokens,
             max_input_length,
             max_total_tokens,
+            disable_grammar_support,
         );
 
         let max_new_tokens = 10;
@@ -537,14 +738,18 @@ mod tests {
         let max_input_length = 5;
         let max_total_tokens = 6;
         let workers = 1;
+        let disable_grammar_support = true;
+        let config = None;
         let validation = Validation::new(
             workers,
             tokenizer,
+            config,
             max_best_of,
             max_stop_sequence,
             max_top_n_tokens,
             max_input_length,
             max_total_tokens,
+            disable_grammar_support,
         );
         match validation
             .validate(GenerateRequest {
@@ -569,22 +774,27 @@ mod tests {
         let max_stop_sequence = 3;
         let max_top_n_tokens = 4;
         let max_input_length = 5;
-        let max_total_tokens = 6;
+        let max_total_tokens = 106;
         let workers = 1;
+        let disable_grammar_support = true;
+        let config = None;
         let validation = Validation::new(
             workers,
             tokenizer,
+            config,
             max_best_of,
             max_stop_sequence,
             max_top_n_tokens,
             max_input_length,
             max_total_tokens,
+            disable_grammar_support,
         );
         match validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_p: Some(1.0),
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
@@ -599,6 +809,7 @@ mod tests {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_p: Some(0.99),
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
@@ -613,6 +824,7 @@ mod tests {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_p: None,
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
@@ -629,22 +841,27 @@ mod tests {
         let max_stop_sequences = 3;
         let max_top_n_tokens = 4;
         let max_input_length = 5;
-        let max_total_tokens = 6;
+        let max_total_tokens = 106;
         let workers = 1;
+        let disable_grammar_support = true;
+        let config = None;
         let validation = Validation::new(
             workers,
             tokenizer,
+            config,
             max_best_of,
             max_stop_sequences,
             max_top_n_tokens,
             max_input_length,
             max_total_tokens,
+            disable_grammar_support,
         );
         match validation
             .validate(GenerateRequest {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_n_tokens: Some(5),
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
@@ -659,6 +876,7 @@ mod tests {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_n_tokens: Some(4),
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
@@ -670,6 +888,7 @@ mod tests {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_n_tokens: Some(0),
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
@@ -681,6 +900,7 @@ mod tests {
                 inputs: "Hello".to_string(),
                 parameters: GenerateParameters {
                     top_n_tokens: None,
+                    max_new_tokens: Some(5),
                     ..default_parameters()
                 },
             })
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 313c018c..67982433 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -3,4 +3,4 @@
 # Branched from master on: 10 November, 2023
 # https://releases.rs/docs/1.75.0/
 channel = "1.75.0"
-components = ["rustfmt", "clippy"]
\ No newline at end of file
+components = ["rustfmt", "clippy"]
diff --git a/server/.gitignore b/server/.gitignore
index dcb8fe67..576746ee 100644
--- a/server/.gitignore
+++ b/server/.gitignore
@@ -161,3 +161,4 @@ flash-attention-v2/
 vllm/
 llm-awq/
 eetq/
+mamba/
diff --git a/server/Makefile b/server/Makefile
index 49001f6d..7e38eb12 100644
--- a/server/Makefile
+++ b/server/Makefile
@@ -3,6 +3,7 @@ include Makefile-flash-att-v2
 include Makefile-vllm
 include Makefile-awq
 include Makefile-eetq
+include Makefile-selective-scan
 
 unit-tests:
 	pytest -s -vv -m "not private" tests
@@ -18,8 +19,8 @@ gen-server:
 
 install: gen-server
 	pip install pip --upgrade
-	pip install -r requirements_cuda.txt
-	pip install -e ".[bnb, accelerate, quantize, peft]"
+	pip install -r requirements.txt
+	pip install -e "."
 
 run-dev:
 	SAFETENSORS_FAST_GPU=1 python -m torch.distributed.run --nproc_per_node=2 text_generation_server/cli.py serve bigscience/bloom-560m --sharded
@@ -32,4 +33,4 @@ update-lock:
 	poetry lock --no-update
 
 export-requirements:
-	poetry export -f requirements.txt --without-hashes --output requirements.txt
+	poetry export -o requirements.txt --without-hashes
diff --git a/server/Makefile-awq b/server/Makefile-awq
index 80e78c08..4e074a13 100644
--- a/server/Makefile-awq
+++ b/server/Makefile-awq
@@ -1,8 +1,10 @@
-awq_commit := f084f40bd996f3cf3a0633c1ad7d9d476c318aaa
+# Fork that adds only the correct stream to this kernel in order
+# to make cuda graphs work.
+awq_commit := bd1dc2d5254345cc76ab71894651fb821275bdd4
 
-awq: 
+awq:
 	rm -rf llm-awq
-	git clone https://github.com/mit-han-lab/llm-awq
+	git clone https://github.com/huggingface/llm-awq
 
 build-awq: awq
 	cd llm-awq/ && git fetch && git checkout $(awq_commit)
diff --git a/server/Makefile-eetq b/server/Makefile-eetq
index 5e8e9830..726e47b5 100644
--- a/server/Makefile-eetq
+++ b/server/Makefile-eetq
@@ -1,4 +1,4 @@
-eetq_commit := 323827dd471458a84e9c840f614e4592b157a4b1
+eetq_commit := 1657b1504faa359e2ce0ac02999439d7ac8c74c0
 
 eetq:
     # Clone eetq
@@ -6,7 +6,7 @@ eetq:
 	git clone https://github.com/NetEase-FuXi/EETQ.git eetq
 
 build-eetq: eetq
-	cd eetq && git fetch && git checkout $(eetq_commit)
+	cd eetq && git fetch && git checkout $(eetq_commit) && git submodule update --init --recursive
 	cd eetq && python setup.py build
 
 install-eetq: build-eetq
diff --git a/server/Makefile-flash-att b/server/Makefile-flash-att
index b4b2e40c..ffa304aa 100644
--- a/server/Makefile-flash-att
+++ b/server/Makefile-flash-att
@@ -13,4 +13,4 @@ build-flash-attention: flash-attention
 
 install-flash-attention: build-flash-attention
 	pip uninstall flash_attn rotary_emb dropout_layer_norm -y || true
-	cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install
\ No newline at end of file
+	cd flash-attention && python setup.py install && cd csrc/layer_norm && python setup.py install && cd ../rotary && python setup.py install
diff --git a/server/Makefile-flash-att-v2 b/server/Makefile-flash-att-v2
index 71c6cabe..803b3d1f 100644
--- a/server/Makefile-flash-att-v2
+++ b/server/Makefile-flash-att-v2
@@ -1,4 +1,4 @@
-flash_att_v2_commit_cuda := 02ac572f3ffc4f402e4183aaa6824b45859d3ed3
+flash_att_v2_commit_cuda := 23e8fa5a263d1c7122bc46a86ef32030ee7130f9
 flash_att_v2_commit_rocm := 8736558c287ff2ef28b24878e42828c595ac3e69
 
 
diff --git a/server/Makefile-selective-scan b/server/Makefile-selective-scan
new file mode 100644
index 00000000..b93b517d
--- /dev/null
+++ b/server/Makefile-selective-scan
@@ -0,0 +1,28 @@
+selective_scan_commit := 2a3704fd47ba817b415627b06fd796b971fdc137
+
+causal-conv1d:
+	rm -rf causal-conv1d
+	git clone https://github.com/Dao-AILab/causal-conv1d.git
+
+build-causal-conv1d: causal-conv1d
+	cd causal-conv1d/ && git checkout v1.1.1 # known latest working version tag
+	cd causal-conv1d/ && CAUSAL_CONV1D_FORCE_BUILD=TRUE python setup.py build
+
+install-causal-conv1d: build-causal-conv1d
+	pip uninstall causal-conv1d -y || true
+	cd causal-conv1d/ && pip install .
+
+# selective-scan dependends on causal-conv1d
+selective-scan:
+	rm -rf mamba
+	git clone https://github.com/state-spaces/mamba.git mamba
+
+build-selective-scan: selective-scan
+	cd mamba/ && git fetch && git checkout $(selective_scan_commit)
+	cd mamba && python setup.py build
+
+install-selective-scan: install-causal-conv1d build-selective-scan
+	pip uninstall selective-scan-cuda -y || true
+	cd mamba && pip install .
+
+build-all: build-causal-conv1d build-selective-scan
diff --git a/server/Makefile-vllm b/server/Makefile-vllm
index ddb648ea..ada484a6 100644
--- a/server/Makefile-vllm
+++ b/server/Makefile-vllm
@@ -1,22 +1,25 @@
-build-vllm-cuda: REPOSITORY=https://github.com/vllm-project/vllm.git
-build-vllm-cuda: VLLM_COMMIT=f8a1e39fae05ca610be8d5a78be9d40f5274e5fc
-build-vllm-cuda: BRANCH=main
-build-vllm-cuda: build-vllm
-
-build-vllm-rocm: REPOSITORY=https://github.com/fxmarty/vllm-public.git
-build-vllm-rocm: VLLM_COMMIT=ad9b7c4095ef54419a0533d254f2ad84bd2dfcae
-build-vllm-rocm: BRANCH=rotary-no-positions-split-cos-sin
-build-vllm-rocm: build-vllm
-
-vllm:
+vllm-cuda:
     # Clone vllm
 	pip install -U ninja packaging --no-cache-dir
-	git clone --single-branch --branch $(BRANCH) $(REPOSITORY) vllm
+	git clone https://github.com/OlivierDehaene/vllm.git vllm
 
-build-vllm: vllm
-	cd vllm && git fetch && git checkout $(VLLM_COMMIT)
+build-vllm-cuda: vllm-cuda
+	cd vllm && git fetch && git checkout 4bec8cee87f6bb8cebaec297029713cd2082e0b2
 	cd vllm && python setup.py build
 
-install-vllm: build-vllm
+install-vllm-cuda: build-vllm-cuda
+	pip uninstall vllm -y || true
+	cd vllm && python setup.py install
+
+vllm-rocm:
+    # Clone vllm
+	pip install -U ninja packaging --no-cache-dir
+	git clone https://github.com/fxmarty/vllm-public.git vllm
+
+build-vllm-rocm: vllm-rocm
+	cd vllm && git fetch && git checkout ad9b7c4095ef54419a0533d254f2ad84bd2dfcae
+	cd vllm && python setup.py build
+
+install-vllm-rocm: build-vllm-rocm
 	pip uninstall vllm -y || true
 	cd vllm && python setup.py install
diff --git a/server/README.md b/server/README.md
index 8efd80ac..b8208f9e 100644
--- a/server/README.md
+++ b/server/README.md
@@ -12,4 +12,4 @@ make install
 
 ```shell
 make run-dev
-```
\ No newline at end of file
+```
diff --git a/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu b/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
index 4be547b1..8206c3e0 100644
--- a/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
+++ b/server/custom_kernels/custom_kernels/fused_bloom_attention_cuda.cu
@@ -247,4 +247,4 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
         &forward,
         "Bloom attention mechanism forward (CUDA)"
     );
-}
\ No newline at end of file
+}
diff --git a/server/dill-0.3.8-patch.sh b/server/dill-0.3.8-patch.sh
new file mode 100644
index 00000000..da263960
--- /dev/null
+++ b/server/dill-0.3.8-patch.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+git clone -b 0.3.8 https://github.com/uqfoundation/dill.git
+pushd dill
+cat <<EOF > dill-0.3.8.patch
+diff --git a/dill/_dill.py b/dill/_dill.py
+index d42432f..1d251e6 100644
+--- a/dill/_dill.py
++++ b/dill/_dill.py
+@@ -69,7 +69,15 @@ TypeType = type # 'new-style' classes #XXX: unregistered
+ XRangeType = range
+ from types import MappingProxyType as DictProxyType, new_class
+ from pickle import DEFAULT_PROTOCOL, HIGHEST_PROTOCOL, PickleError, PicklingError, UnpicklingError
+-import __main__ as _main_module
++class _LazyMainModule(object):
++    _module = None
++    @property
++    def module(self):
++        if self._module is None:
++            import __main__ as _m_module
++            self._module = _m_module
++        return self._module
++_main_module = _LazyMainModule()
+ import marshal
+ import gc
+ # import zlib
+@@ -355,7 +363,7 @@ class Pickler(StockPickler):
+         _fmode = kwds.pop('fmode', None)
+         _recurse = kwds.pop('recurse', None)
+         StockPickler.__init__(self, file, *args, **kwds)
+-        self._main = _main_module
++        self._main = _main_module.module
+         self._diff_cache = {}
+         self._byref = settings['byref'] if _byref is None else _byref
+         self._strictio = False #_strictio
+@@ -437,12 +445,12 @@ class Unpickler(StockUnpickler):
+         settings = Pickler.settings
+         _ignore = kwds.pop('ignore', None)
+         StockUnpickler.__init__(self, *args, **kwds)
+-        self._main = _main_module
++        self._main = _main_module.module
+         self._ignore = settings['ignore'] if _ignore is None else _ignore
+ 
+     def load(self): #NOTE: if settings change, need to update attributes
+         obj = StockUnpickler.load(self)
+-        if type(obj).__module__ == getattr(_main_module, '__name__', '__main__'):
++        if type(obj).__module__ == getattr(self._main, '__name__', '__main__'):
+             if not self._ignore:
+                 # point obj class to main
+                 try: obj.__class__ = getattr(self._main, type(obj).__name__)
+@@ -1199,11 +1207,11 @@ def save_module_dict(pickler, obj):
+         logger.trace(pickler, "D1: %s", _repr_dict(obj)) # obj
+         pickler.write(bytes('c__builtin__\n__main__\n', 'UTF-8'))
+         logger.trace(pickler, "# D1")
+-    elif (not is_dill(pickler, child=False)) and (obj == _main_module.__dict__):
++    elif (not is_dill(pickler, child=False)) and (obj == _main_module.module.__dict__):
+         logger.trace(pickler, "D3: %s", _repr_dict(obj)) # obj
+         pickler.write(bytes('c__main__\n__dict__\n', 'UTF-8'))  #XXX: works in general?
+         logger.trace(pickler, "# D3")
+-    elif '__name__' in obj and obj != _main_module.__dict__ \\
++    elif '__name__' in obj and obj != _main_module.module.__dict__ \\
+             and type(obj['__name__']) is str \\
+             and obj is getattr(_import_module(obj['__name__'],True), '__dict__', None):
+         logger.trace(pickler, "D4: %s", _repr_dict(obj)) # obj
+diff --git a/dill/session.py b/dill/session.py
+index e91068a..a921b43 100644
+--- a/dill/session.py
++++ b/dill/session.py
+@@ -233,7 +233,7 @@ def dump_module(
+     protocol = settings['protocol']
+     main = module
+     if main is None:
+-        main = _main_module
++        main = _main_module.module
+     elif isinstance(main, str):
+         main = _import_module(main)
+     if not isinstance(main, ModuleType):
+@@ -501,7 +501,7 @@ def load_module(
+             pass
+     assert loaded is main
+     _restore_modules(unpickler, main)
+-    if main is _main_module or main is module:
++    if main is _main_module.module or main is module:
+         return None
+     else:
+         return main
+
+EOF
+git apply dill-0.3.8.patch
+python -m pip install .
+popd
+rm -fr dill
\ No newline at end of file
diff --git a/server/exllama_kernels/exllama_kernels/cuda_compat.cuh b/server/exllama_kernels/exllama_kernels/cu_compat.cuh
similarity index 91%
rename from server/exllama_kernels/exllama_kernels/cuda_compat.cuh
rename to server/exllama_kernels/exllama_kernels/cu_compat.cuh
index 8dfa25de..c5258813 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_compat.cuh
+++ b/server/exllama_kernels/exllama_kernels/cu_compat.cuh
@@ -43,12 +43,12 @@ __device__ __forceinline__ void atomicAdd_half2(half2* address, half2 val)
 
 //
 
-#if defined(__CUDA_ARCH__)
-#if __CUDA_ARCH__ < 700
+#if defined(__CUDA_ARCH__) || defined(USE_ROCM)
+#if __CUDA_ARCH__ < 700 || defined(USE_ROCM)
 
 __device__ __forceinline__ void atomicAdd(half* address, half val) { atomicAdd_half(address, val); }
 
-#if __CUDA_ARCH__ < 600
+#if __CUDA_ARCH__ < 600 || defined(USE_ROCM)
 __device__ __forceinline__ void atomicAdd(half2* address, half2 val) { atomicAdd_half2(address, val); }
 #endif
 
diff --git a/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh b/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
index 6571c17d..0364e38c 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/column_remap.cuh
@@ -16,4 +16,4 @@ void column_remap_cuda
     const uint32_t* x_map
 );
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu
index 60dc4c9d..1b0f7956 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cu
@@ -1,9 +1,13 @@
 #include "q4_matmul.cuh"
 #include "column_remap.cuh"
+#include <ATen/cuda/CUDAContext.h>
 #include "../util.cuh"
 #include "../matrix.cuh"
-#include "../cuda_compat.cuh"
+#include "../cu_compat.cuh"
 #include "../cuda_buffers.cuh"
+#if defined(USE_ROCM)
+#include "../hip_compat.cuh"
+#endif
 
 const int THREADS_X = 32;       // Block size and thread count along columns in w and out
 const int THREADS_Y = 1;        // Block size and thread count along rows in x and out
@@ -82,7 +86,7 @@ __global__ void q4_matmul_kernel
             if constexpr (use_half2)
             {
                 half2 w_scale = w_scales_.item_half2half2(group, w_column);
-                uint32_t w_zero = w_zeros_.item(group, w_column) + 1;
+                uint32_t w_zero = (w_zeros_.item(group, w_column) + 1) & 0x0F;
 
                 if constexpr (use_x_map) acc = dot_product_8_x_map(acc, x_, x_row, k, w_, k, w_column, w_scale, w_zero, groupsize / 8, x_map);
                 else                     acc = dot_product_8      (acc, x_, x_row, k, w_, k, w_column, w_scale, w_zero, groupsize / 8);
@@ -90,7 +94,7 @@ __global__ void q4_matmul_kernel
             else
             {
                 half w_scale = w_scales_.item(group, w_column);
-                uint32_t w_zero = w_zeros_.item(group, w_column) + 1;
+                uint32_t w_zero = (w_zeros_.item(group, w_column) + 1) & 0x0F;
 
                 if constexpr (use_x_map) acc_h = dot_product_8_x_map_h(acc_h, x_, x_row, k, w_, k, w_column, w_scale, w_zero, groupsize / 8, x_map);
                 else                     acc_h = dot_product_8_h      (acc_h, x_, x_row, k, w_, k, w_column, w_scale, w_zero, groupsize / 8);
@@ -107,7 +111,7 @@ __global__ void q4_matmul_kernel
             {
                 int group = k / groupsize;
                 half2 w_scale = w_scales_.item_half2half2(group, w_column);
-                uint32_t w_zero = w_zeros_.item(group, w_column) + 1;
+                uint32_t w_zero = (w_zeros_.item(group, w_column) + 1) & 0x0F;
 
                 if constexpr (use_x_map) acc = dot_product_8_x_map(acc, x_, x_row, k, w_, k, w_column, w_scale, w_zero, 1, x_map);
                 else                     acc = dot_product_8      (acc, x_, x_row, k, w_, k, w_column, w_scale, w_zero, 1);
@@ -116,7 +120,7 @@ __global__ void q4_matmul_kernel
             {
                 int group = k / groupsize;
                 half w_scale = w_scales_.item(group, w_column);
-                uint32_t w_zero = w_zeros_.item(group, w_column) + 1;
+                uint32_t w_zero = (w_zeros_.item(group, w_column) + 1) & 0x0F;
 
                 if constexpr (use_x_map) acc_h = dot_product_8_x_map_h(acc_h, x_, x_row, k, w_, k, w_column, w_scale, w_zero, 1, x_map);
                 else                     acc_h = dot_product_8_h      (acc_h, x_, x_row, k, w_, k, w_column, w_scale, w_zero, 1);
@@ -128,7 +132,7 @@ __global__ void q4_matmul_kernel
 
     if constexpr (use_half2)
     {
-        half result = __hadd(acc.x, acc.y);
+        half result = __hadd(__low2half(acc), __high2half(acc));
         atomicAdd(out_.item_ptr(x_row, w_column), result);
     }
     else
@@ -221,8 +225,8 @@ void q4_matmul_recons_cuda
     const int x_height,
     Q4Matrix* w,
     half* out,
-    const cublasHandle_t handle,
-    bool no_zero
+    bool no_zero,
+    const cublasHandle_t handle
 )
 {
     int height = x_height;
diff --git a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh
index 63611790..4c7a6669 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matmul.cuh
@@ -19,8 +19,8 @@ void q4_matmul_cuda
     const int x_height,
     const Q4Matrix* w,
     half* out,
-    bool no_zero = false,
-    cudaStream_t alt_stream = NULL
+    bool no_zero,
+    cudaStream_t alt_stream
 );
 
 void q4_matmul_recons_cuda
@@ -30,8 +30,8 @@ void q4_matmul_recons_cuda
     const int x_height,
     Q4Matrix* w,
     half* out,
-    const cublasHandle_t handle,
-    bool no_zero = false
+    bool no_zero,
+    const cublasHandle_t handle
 );
 
 #endif
diff --git a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu
index f3d1564f..1f32e6b8 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cu
@@ -1,5 +1,6 @@
 // Adapted from turboderp exllama: https://github.com/turboderp/exllama
 
+#include <ATen/cuda/CUDAContext.h>
 #include "q4_matrix.cuh"
 #include <vector>
 #include "../util.cuh"
@@ -90,7 +91,7 @@ __global__ void make_sequential_kernel
         int w2_row_shift = w2_subrow << 2;
         int wnew2_row_shift = i << 2;
 
-        uint64_t src = w2[w2_row * w2_stride + w2_column];
+    uint64_t src = w2[w2_row * w2_stride + w2_column];
         src >>= w2_row_shift;
         src &= 0x0000000f0000000f;
         src <<= wnew2_row_shift;
@@ -146,7 +147,8 @@ void Q4Matrix::make_sequential(const uint32_t* cpu_g_idx)
     dim3 threads(UNSHUF_BLOCKSIZE_X, 1, 1);
     dim3 blocks(width / UNSHUF_BLOCKSIZE_X / 2, height / 8, 1);
 
-    make_sequential_kernel<<<blocks, threads>>>(cuda_qweight, cuda_new_qweight, cuda_x_map, height / 8, width);
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    make_sequential_kernel<<<blocks, threads, 0, stream>>>(cuda_qweight, cuda_new_qweight, cuda_x_map, height / 8, width);
 
     // Replace qweights
 
@@ -189,7 +191,7 @@ __global__ void reconstruct_kernel
     int group = row / groupsize;
 
     half w_scale = w_scales_.item(group, column);
-    uint32_t w_zero = w_zeros_.item(group, column) + 1;
+    uint32_t w_zero = (w_zeros_.item(group, column) + 1) & 0x0F;
 
     uint32_t w_read = w_.item_uint32_t(row, column);
     half* out_ptr = out_.item_ptr(row, column);
@@ -213,5 +215,6 @@ void Q4Matrix::reconstruct(half* out)
         1
     );
 
-    reconstruct_kernel<<<blocks, threads>>>(cuda_qweight, out, cuda_scales, cuda_qzeros, height / 8, width, groupsize);
-}
\ No newline at end of file
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
+    reconstruct_kernel<<<blocks, threads, 0, stream>>>(cuda_qweight, out, cuda_scales, cuda_qzeros, height / 8, width, groupsize);
+}
diff --git a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
index 50cb72a4..49431dc9 100644
--- a/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
+++ b/server/exllama_kernels/exllama_kernels/cuda_func/q4_matrix.cuh
@@ -50,4 +50,4 @@ private:
 void g_q4_keep_matrix(Q4Matrix* m);
 void g_q4_free_matrices();
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllama_kernels/exllama_kernels/exllama_ext.cpp b/server/exllama_kernels/exllama_kernels/exllama_ext.cpp
index b786988b..f2df80e8 100644
--- a/server/exllama_kernels/exllama_kernels/exllama_ext.cpp
+++ b/server/exllama_kernels/exllama_kernels/exllama_ext.cpp
@@ -183,6 +183,7 @@ void q4_matmul
 
     int x_height = x.size(0);
 
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     if (tuningParams.matmul_recons_thd == 0 || x_height < tuningParams.matmul_recons_thd)
     {
         q4_matmul_cuda
@@ -191,7 +192,9 @@ void q4_matmul
             (half*) x.data_ptr(),
             x_height,
             wm,
-            (half*) out.data_ptr()
+            (half*) out.data_ptr(),
+            false,
+            stream
         );
     }
     else
@@ -203,6 +206,7 @@ void q4_matmul
             x_height,
             wm,
             (half*) out.data_ptr(),
+            false,
             at::cuda::getCurrentCUDABlasHandle()
         );
     }
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh b/server/exllama_kernels/exllama_kernels/hip_compat.cuh
similarity index 68%
rename from server/exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh
rename to server/exllama_kernels/exllama_kernels/hip_compat.cuh
index 19b1e4a6..5e698b1a 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/compat_gemm.cuh
+++ b/server/exllama_kernels/exllama_kernels/hip_compat.cuh
@@ -1,12 +1,23 @@
-#ifndef _compat_gemm_cuh
-#define _compat_gemm_cuh
+// Adapted from turboderp exllama: https://github.com/turboderp/exllama
 
-#if defined(USE_ROCM)
+#ifndef _hip_compat_cuh
+#define _hip_compat_cuh
 
-// For some reason this include is not present anywhere in exllama_v2 codebase, but it is required
-// for symbols as hipblasHalf.
-#include <hipblas/hipblas.h>
+// Workaround for a bug in hipamd, backported from upstream, this is fixed in ROCm 5.6.
+__device__ __forceinline__ __half __compat_hrcp(__half x) {
+    return __half_raw{
+        static_cast<_Float16>(__builtin_amdgcn_rcph(static_cast<__half_raw>(x).data))};
+}
 
+__device__ __forceinline__ __half2 __compat_h2rcp(__half2 x) {
+    return _Float16_2{static_cast<_Float16>(__builtin_amdgcn_rcph(x.x)),
+        static_cast<_Float16>(__builtin_amdgcn_rcph(x.y))};
+}
+
+#define hrcp __compat_hrcp
+#define h2rcp __compat_h2rcp
+
+// Automatic conversion of hipblasHgemm doesn't convert half to hipblasHalf.
 __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t    handle,
                                                                hipblasOperation_t transA,
                                                                hipblasOperation_t transB,
@@ -31,8 +42,10 @@ __host__ __forceinline__ hipblasStatus_t __compat_hipblasHgemm(hipblasHandle_t
 #define hipblasHgemm __compat_hipblasHgemm
 
 // Previous version of PyTorch were converting to rocBLAS instead of hipBLAS.
+#define rocblas_handle hipblasHandle_t
 #define rocblas_operation_none HIPBLAS_OP_N
+#define rocblas_get_stream hipblasGetStream
+#define rocblas_set_stream hipblasSetStream
 #define rocblas_hgemm __compat_hipblasHgemm
-#endif
 
 #endif
diff --git a/server/exllama_kernels/exllama_kernels/util.cuh b/server/exllama_kernels/exllama_kernels/util.cuh
index 2839b10f..7b397573 100644
--- a/server/exllama_kernels/exllama_kernels/util.cuh
+++ b/server/exllama_kernels/exllama_kernels/util.cuh
@@ -8,7 +8,11 @@
 #include <cstdint>
 #include <cstdio>
 
+#if defined(USE_ROCM)
+#define cudaUnspecified hipErrorUnknown
+#else
 #define cudaUnspecified cudaErrorApiFailureBase
+#endif
 
 // React to failure on return code != cudaSuccess
 
diff --git a/server/exllamav2_kernels/exllamav2_kernels/config.h b/server/exllamav2_kernels/exllamav2_kernels/config.h
index 86baaf41..32a1a37d 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/config.h
+++ b/server/exllamav2_kernels/exllamav2_kernels/config.h
@@ -2,6 +2,7 @@
 #define _config_h
 
 #define MAX_Q_GEMM_ROWS 50
+#define MAX_Q_GEMM_WEIGHTS 4  // must be <= MAX_Q_GEMM_ROWS
 
 #define QMODE_2BIT 1
 #define QMODE_3BIT 1
@@ -10,4 +11,5 @@
 #define QMODE_6BIT 0
 #define QMODE_8BIT 0
 
+
 #endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
index 55af84f2..a72bc7bc 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/matrix_view.cuh
@@ -118,4 +118,4 @@ public:
     }
 };
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu
index 351b9cd5..5b99f1ba 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cu
@@ -10,16 +10,19 @@
 #include "quant/qdq_6.cuh"
 #include "quant/qdq_8.cuh"
 
-#define BLOCK_KN_SIZE 128
-#define BLOCK_M_SIZE_MAX 8
-#define MAX_GROUPS_IN_BLOCK (BLOCK_KN_SIZE / 32)
+#define GPTQ_BLOCK_KN_SIZE 128
+#define GPTQ_BLOCK_M_SIZE_MAX 8
+#define GPTQ_MAX_GROUPS_IN_BLOCK (GPTQ_BLOCK_KN_SIZE / 32)
+
+#define EXL2_BLOCK_KN_SIZE 64
+#define EXL2_BLOCK_M_SIZE_MAX 8
+#define EXL2_MAX_GROUPS_IN_BLOCK (EXL2_BLOCK_KN_SIZE / 32)
+
 #define CLEAR_N_SIZE 256
 
 #include "q_gemm_kernel.cuh"
 #include "q_gemm_kernel_gptq.cuh"
 
-#include "compat_gemm.cuh"
-
 void gemm_half_q_half_cuda_part
 (
     const half* a,
@@ -29,22 +32,26 @@ void gemm_half_q_half_cuda_part
     int size_n,
     int size_k,
     int m_count,
-    bool clear
+    bool clear,
+    const half* r_weights,
+    int r_weights_stride,
+    bool mul_r_weights
 )
 {
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     if (!b->is_gptq)
     {
         dim3 blockDim, gridDim;
-        blockDim.x = BLOCK_KN_SIZE;
+        blockDim.x = EXL2_BLOCK_KN_SIZE;
         blockDim.y = 1;
         blockDim.z = 1;
-        gridDim.x = DIVIDE(size_n, BLOCK_KN_SIZE * 4);
+        gridDim.x = DIVIDE(size_n, EXL2_BLOCK_KN_SIZE * 4);
         gridDim.y = DIVIDE(size_m, m_count);
-        gridDim.z = DIVIDE(size_k, BLOCK_KN_SIZE);
+        gridDim.z = DIVIDE(size_k, EXL2_BLOCK_KN_SIZE);
 
-        fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(true, m_count);
+        fp_gemm_half_q_half_kernel kernel = pick_gemm_half_q_half_kernel(m_count, r_weights != NULL, mul_r_weights);
 
-        kernel<<<gridDim, blockDim>>>
+        kernel<<<gridDim, blockDim, 0, stream>>>
         (
             a,
             b->cuda_q_weight,
@@ -55,7 +62,7 @@ void gemm_half_q_half_cuda_part
             size_n,
             size_k,
             b->groups,
-            b->groupsize,
+            b->cuda_q_group_map,
             b->cuda_q_perm,
             b->rows_8,
             b->rows_6,
@@ -63,26 +70,29 @@ void gemm_half_q_half_cuda_part
             b->rows_4,
             b->rows_3,
             b->rows_2,
-            clear
+            clear,
+            r_weights,
+            r_weights_stride
         );
     }
     else
     {
         dim3 blockDim, gridDim;
-        blockDim.x = BLOCK_KN_SIZE;
+        blockDim.x = GPTQ_BLOCK_KN_SIZE;
         blockDim.y = 1;
         blockDim.z = 1;
-        gridDim.x = DIVIDE(size_n, BLOCK_KN_SIZE * 4);
+        gridDim.x = DIVIDE(size_n, GPTQ_BLOCK_KN_SIZE * 4);
         gridDim.y = DIVIDE(size_m, m_count);
-        gridDim.z = DIVIDE(size_k, BLOCK_KN_SIZE);
+        gridDim.z = DIVIDE(size_k, GPTQ_BLOCK_KN_SIZE);
 
-        fp_gemm_half_q_half_gptq_kernel kernel = pick_gemm_half_q_half_gptq_kernel(true, m_count);
+        fp_gemm_half_q_half_gptq_kernel kernel = pick_gemm_half_q_half_gptq_kernel(m_count, r_weights != NULL, mul_r_weights);
 
-//         DBGX((uint64_t) b->cuda_q_perm);
-//         DBGI(b->rows_4);
-//         DBGI(b->height);
+//         DBGX((uint64_t) r_weights);
+//         if (r_weights)
+//             print_global_mem(r_weights, 1, 1, 1);
+//         DBGI(r_weights_stride);
 
-        kernel<<<gridDim, blockDim>>>
+        kernel<<<gridDim, blockDim, 0, stream>>>
         (
             a,
             b->cuda_q_weight,
@@ -93,10 +103,12 @@ void gemm_half_q_half_cuda_part
             size_n,
             size_k,
             b->groups,
-            b->groupsize,
+            b->gptq_groupsize,
             b->cuda_q_perm,
             b->rows_4,
-            clear
+            clear,
+            r_weights,
+            r_weights_stride
         );
     }
 }
@@ -112,13 +124,14 @@ void gemm_half_q_half_cuda
     int size_k,
     bool clear,
     half* temp_dq,
-    bool force_cuda
+    bool force_cuda,
+    const half* r_weights,
+    const int r_weights_stride,
+    bool mul_r_weights
 )
 {
     if (size_m > MAX_Q_GEMM_ROWS && !force_cuda)
     {
-        //printf("cublas\n");
-
         // Reconstruct FP16 matrix, then cuBLAS
 
         if (!temp_dq) temp_dq = b->temp_dq;
@@ -139,12 +152,12 @@ void gemm_half_q_half_cuda
         //const float alpha = 1.0f;
         //const float beta = clear ? 0.0f : 1.0f;
         //cublasSgemmEx(cublas_handle,
-        //              CUBLAS_OP_N,
-        //              CUBLAS_OP_N,
-        //              size_n, size_m, size_k,
-        //              &alpha, temp_dq, CUDA_R_16F, size_n,
-        //                      a,       CUDA_R_16F, size_k,
-        //              &beta,  c,       CUDA_R_16F, size_n);
+        //             CUBLAS_OP_N,
+        //             CUBLAS_OP_N,
+        //             size_n, size_m, size_k,
+        //             &alpha, temp_dq, CUDA_R_16F, size_n,
+        //                     a,       CUDA_R_16F, size_k,
+        //             &beta,  c,       CUDA_R_16F, size_n);
 
         //const float alpha = 1.0f;
         //const float beta = clear ? 0.0f : 1.0f;
@@ -158,24 +171,21 @@ void gemm_half_q_half_cuda
     }
     else
     {
-        //printf("cuda\n");
-
         // Quantized matmul
 
-        //if (clear) clear_tensor_cuda(c, size_m, size_n);
-
-        int max_chunks = size_m / BLOCK_M_SIZE_MAX;
-        int last_chunk = max_chunks * BLOCK_M_SIZE_MAX;
+        int block_m_size_max = b->is_gptq ? GPTQ_BLOCK_M_SIZE_MAX : EXL2_BLOCK_M_SIZE_MAX;
+        int max_chunks = size_m / block_m_size_max;
+        int last_chunk = max_chunks * block_m_size_max;
         int last_chunk_size = size_m - last_chunk;
 
         if (max_chunks)
         {
-            gemm_half_q_half_cuda_part(a, b, c, last_chunk, size_n, size_k, BLOCK_M_SIZE_MAX, clear);
+            gemm_half_q_half_cuda_part(a, b, c, last_chunk, size_n, size_k, block_m_size_max, clear, r_weights, r_weights_stride, mul_r_weights);
         }
 
         if (last_chunk_size)
         {
-            gemm_half_q_half_cuda_part(a + last_chunk * size_k, b, c + last_chunk * size_n, last_chunk_size, size_n, size_k, last_chunk_size, clear);
+            gemm_half_q_half_cuda_part(a + last_chunk * size_k, b, c + last_chunk * size_n, last_chunk_size, size_n, size_k, last_chunk_size, clear, r_weights, r_weights_stride, mul_r_weights);
         }
     }
 }
@@ -201,11 +211,10 @@ void clear_tensor_cuda
     int size_n
 )
 {
-    return;
-    dim3 blockDim, gridDim;
-    blockDim.x = CLEAR_N_SIZE;
-    blockDim.y = 1;
-    gridDim.x = DIVIDE(size_n / 8, CLEAR_N_SIZE);
-    gridDim.y = size_m;
-    clear_kernel<<<gridDim, blockDim>>>(c, size_m, size_n);
+//     dim3 blockDim, gridDim;
+//     blockDim.x = CLEAR_N_SIZE;
+//     blockDim.y = 1;
+//     gridDim.x = DIVIDE(size_n / 8, CLEAR_N_SIZE);
+//     gridDim.y = size_m;
+//     clear_kernel<<<gridDim, blockDim>>>(c, size_m, size_n);
 }
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
index c69f1a70..e49457f3 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm.cuh
@@ -20,7 +20,10 @@ void gemm_half_q_half_cuda
     int size_k,
     bool clear = false,
     half* reconstruct = NULL,
-    bool force_cuda = false
+    bool force_cuda = false,
+    const half* r_weights = NULL,
+    const int r_weights_stride = 0,
+    bool mul_r_weights = false
 );
 
 void clear_tensor_cuda
@@ -30,4 +33,4 @@ void clear_tensor_cuda
     int size_n
 );
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh
index 0b899a84..9cd2ba01 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel.cuh
@@ -1,8 +1,5 @@
 #include "compat.cuh"
 
-#include <cuda_runtime.h>
-#include <cuda_fp16.h>
-
 __forceinline__ __device__ half2 dot22_8(half2(&dq)[4], const half* a_ptr, const half2 g_result, const half qs_h)
 {
     half2 result = {};
@@ -60,6 +57,47 @@ __forceinline__ __device__ float dot22_32_f(half2(&dq)[16], const half* a_ptr, c
     return fma(result_f, qs_f, g_result);
 }
 
+__forceinline__ __device__ half dot22_8_h(half2(&dq)[4], const half* a_ptr, const half g_result, const half qs_h)
+{
+    // Use FP32 accumulator to avoid potential overflow since unscaled weights are in the range -128..127
+
+    float result = {};
+    #pragma unroll
+    for (int i = 0; i < 4; i++)
+    {
+        half2 w01 = dq[i];
+        float w0 = __low2float(w01);
+        float w1 = __high2float(w01);
+        float x0 = __half2float(*a_ptr++);
+        float x1 = __half2float(*a_ptr++);
+        result = fma(w0, x0, result);
+        result = fma(w1, x1, result);
+    }
+    float qs = __half2float(qs_h);
+    result *= qs;
+    half result_h = __float2half_rn(result);
+    return __hadd(result_h, g_result);
+}
+
+__forceinline__ __device__ half dot22_16_h(half2(&dq)[8], const half* a_ptr, const half g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 8; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    half result_h = __hadd(__low2half(result), __high2half(result));
+    return __hfma(result_h, qs_h, g_result);
+}
+
+__forceinline__ __device__ half dot22_32_h(half2(&dq)[16], const half* a_ptr, const half g_result, const half qs_h)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 16; i += 1) result = __hfma2(dq[i], *a2_ptr++, result);
+    half result_h = __hadd(__low2half(result), __high2half(result));
+    return __hfma(result_h, qs_h, g_result);
+}
 
 
 typedef void (*fp_gemm_half_q_half_kernel)
@@ -73,7 +111,7 @@ typedef void (*fp_gemm_half_q_half_kernel)
     const int,
     const int,
     const int,
-    const int,
+    const uint16_t*,
     const uint16_t*,
     const int,
     const int,
@@ -81,10 +119,12 @@ typedef void (*fp_gemm_half_q_half_kernel)
     const int,
     const int,
     const int,
-    const bool
+    const bool,
+    const half*,
+    const int
 );
 
-template <bool first_block, int m_count>
+template <int m_count, bool use_r_weights, bool mul_r_weights>
 __global__ void gemm_half_q_half_kernel
 (
     const half*      __restrict__ a,
@@ -96,7 +136,7 @@ __global__ void gemm_half_q_half_kernel
     const int size_n,
     const int size_k,
     const int groups,
-    const int groupsize,
+    const uint16_t* __restrict__ b_q_group_map,
     const uint16_t* __restrict__ b_q_perm,
     const int rows_8,
     const int rows_6,
@@ -104,7 +144,9 @@ __global__ void gemm_half_q_half_kernel
     const int rows_4,
     const int rows_3,
     const int rows_2,
-    const bool clear
+    const bool clear,
+    const half* r_weights,
+    const int r_weights_stride
 )
 {
     MatrixView_half a_(a, size_m, size_k);
@@ -115,18 +157,34 @@ __global__ void gemm_half_q_half_kernel
 
     // Block
 
-    int offset_n = blockIdx.x * BLOCK_KN_SIZE * 4;
+    int offset_n = blockIdx.x * EXL2_BLOCK_KN_SIZE * 4;
     int offset_m = blockIdx.y * m_count;
-    int offset_k = blockIdx.z * BLOCK_KN_SIZE;
+    int offset_k = blockIdx.z * EXL2_BLOCK_KN_SIZE;
 
-    int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
+    int end_n = min(offset_n + EXL2_BLOCK_KN_SIZE * 4, size_n);
     int end_m = min(offset_m + m_count, size_m);
-    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+    int end_k = min(offset_k + EXL2_BLOCK_KN_SIZE, size_k);
     int n = offset_n + t * 4;
 
+    // Read weights
+
+    half_uint16 weights[MAX_Q_GEMM_WEIGHTS];
+    if constexpr (use_r_weights)
+    {
+        uint16_t any_w = 0;
+        const half* w_ptr = r_weights;
+        for (int m = 0; m < m_count; ++m)
+        {
+            weights[m].as_half = *w_ptr;
+            w_ptr += r_weights_stride;
+            any_w |= weights[m].as_uint16;
+        }
+        if (!any_w) return;  // Early exit if all weights are zero -- does not zero output (!!!)
+    }
+
     // Preload block_a
 
-    __shared__ half block_a[m_count][BLOCK_KN_SIZE];
+    __shared__ half block_a[m_count][EXL2_BLOCK_KN_SIZE];
 
     if (offset_k + t < end_k)
     {
@@ -135,6 +193,7 @@ __global__ void gemm_half_q_half_kernel
             const half* a_ptr = a_.item_ptr(offset_m + m, 0);
             half* block_a_ptr = block_a[m];
             half a0 = a_ptr[b_q_perm[offset_k + t]];
+//            half a0 = a_ptr[offset_k + t];
             block_a_ptr[t] = a0;
         }
     }
@@ -153,14 +212,19 @@ __global__ void gemm_half_q_half_kernel
 
     // Find initial group
 
-    int group = offset_k / groupsize;
+    //int group = offset_k / groupsize;
+    int group = b_q_group_map[offset_k * 2];
+
+//    if (offset_m == 0 && t == 0)
+//        DBGI2(offset_k, group);
 
     // Preload scales
 
-    float scales[MAX_GROUPS_IN_BLOCK][4];
+    half scales[EXL2_MAX_GROUPS_IN_BLOCK][4];
 
-    int groups_in_block = DIVIDE((end_k - offset_k), groupsize);
-    for (int g = 0; g < groups_in_block; g++)
+    //int groups_in_block = DIVIDE((end_k - offset_k), groupsize);
+    int temp_k = offset_k;
+    for (int g = 0; temp_k < end_k; g++)
     {
         int qscales[4];
         b_q_scale_.item4(qscales, group + g, n);
@@ -168,11 +232,12 @@ __global__ void gemm_half_q_half_kernel
         qscales[1]++;
         qscales[2]++;
         qscales[3]++;
-        float maxscale = __half2float(b_q_scale_max[group + g]);
-        scales[g][0] = __int2float_rn(qscales[0] * qscales[0]) * maxscale;
-        scales[g][1] = __int2float_rn(qscales[1] * qscales[1]) * maxscale;
-        scales[g][2] = __int2float_rn(qscales[2] * qscales[2]) * maxscale;
-        scales[g][3] = __int2float_rn(qscales[3] * qscales[3]) * maxscale;
+        half maxscale = b_q_scale_max[group + g];
+        scales[g][0] = __hmul(__int2half_rn(qscales[0] * qscales[0]), maxscale);
+        scales[g][1] = __hmul(__int2half_rn(qscales[1] * qscales[1]), maxscale);
+        scales[g][2] = __hmul(__int2half_rn(qscales[2] * qscales[2]), maxscale);
+        scales[g][3] = __hmul(__int2half_rn(qscales[3] * qscales[3]), maxscale);
+        temp_k += b_q_group_map[temp_k * 2 + 1];
     }
 
     // a, b offset
@@ -193,20 +258,20 @@ __global__ void gemm_half_q_half_kernel
 
     const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
     const half* a_ptr = &block_a[0][0];
-    int a_stride = BLOCK_KN_SIZE;
+    int a_stride = EXL2_BLOCK_KN_SIZE;
 
     // Initial group
 
     int scales_idx = 0;
-    float qs_f0 = scales[scales_idx][0];
-    float qs_f1 = scales[scales_idx][1];
-    float qs_f2 = scales[scales_idx][2];
-    float qs_f3 = scales[scales_idx][3];
-    int nextgroup = offset_k + groupsize;
+    half qs_h0 = scales[scales_idx][0];
+    half qs_h1 = scales[scales_idx][1];
+    half qs_h2 = scales[scales_idx][2];
+    half qs_h3 = scales[scales_idx][3];
+    int nextgroup = offset_k + b_q_group_map[offset_k * 2 + 1];
 
     // Column result
 
-    float block_c[m_count][4] = {};
+    half block_c[m_count][4] = {};
 
     // Dequantize groups
 
@@ -218,11 +283,11 @@ __global__ void gemm_half_q_half_kernel
         {
             group++;
             scales_idx++;
-            qs_f0 = scales[scales_idx][0];
-            qs_f1 = scales[scales_idx][1];
-            qs_f2 = scales[scales_idx][2];
-            qs_f3 = scales[scales_idx][3];
-            nextgroup += groupsize;
+            qs_h0 = scales[scales_idx][0];
+            qs_h1 = scales[scales_idx][1];
+            qs_h2 = scales[scales_idx][2];
+            qs_h3 = scales[scales_idx][3];
+            nextgroup += b_q_group_map[k * 2 + 1];
         }
 
         #pragma unroll
@@ -240,10 +305,11 @@ __global__ void gemm_half_q_half_kernel
 
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = dot22_8_f(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_f0);
-                block_c[m][1] = dot22_8_f(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_f1);
-                block_c[m][2] = dot22_8_f(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_f2);
-                block_c[m][3] = dot22_8_f(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_f3);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = dot22_8_h(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_h0);
+                block_c[m][1] = dot22_8_h(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_h1);
+                block_c[m][2] = dot22_8_h(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_h2);
+                block_c[m][3] = dot22_8_h(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_h3);
             }
             a_ptr += 8;
         }
@@ -256,11 +322,11 @@ __global__ void gemm_half_q_half_kernel
         {
             group++;
             scales_idx++;
-            qs_f0 = scales[scales_idx][0];
-            qs_f1 = scales[scales_idx][1];
-            qs_f2 = scales[scales_idx][2];
-            qs_f3 = scales[scales_idx][3];
-            nextgroup += groupsize;
+            qs_h0 = scales[scales_idx][0];
+            qs_h1 = scales[scales_idx][1];
+            qs_h2 = scales[scales_idx][2];
+            qs_h3 = scales[scales_idx][3];
+            nextgroup += b_q_group_map[k * 2 + 1];
         }
 
         #pragma unroll
@@ -279,10 +345,11 @@ __global__ void gemm_half_q_half_kernel
 
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = dot22_16_f(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_f0);
-                block_c[m][1] = dot22_16_f(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_f1);
-                block_c[m][2] = dot22_16_f(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_f2);
-                block_c[m][3] = dot22_16_f(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_f3);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = dot22_16_h(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_h0);
+                block_c[m][1] = dot22_16_h(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_h1);
+                block_c[m][2] = dot22_16_h(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_h2);
+                block_c[m][3] = dot22_16_h(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_h3);
             }
             a_ptr += 16;
         }
@@ -295,11 +362,11 @@ __global__ void gemm_half_q_half_kernel
         {
             group++;
             scales_idx++;
-            qs_f0 = scales[scales_idx][0];
-            qs_f1 = scales[scales_idx][1];
-            qs_f2 = scales[scales_idx][2];
-            qs_f3 = scales[scales_idx][3];
-            nextgroup += groupsize;
+            qs_h0 = scales[scales_idx][0];
+            qs_h1 = scales[scales_idx][1];
+            qs_h2 = scales[scales_idx][2];
+            qs_h3 = scales[scales_idx][3];
+            nextgroup += b_q_group_map[k * 2 + 1];
         }
 
         #pragma unroll
@@ -320,10 +387,11 @@ __global__ void gemm_half_q_half_kernel
 
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = dot22_32_f(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_f0);
-                block_c[m][1] = dot22_32_f(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_f1);
-                block_c[m][2] = dot22_32_f(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_f2);
-                block_c[m][3] = dot22_32_f(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_f3);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = dot22_32_h(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_h0);
+                block_c[m][1] = dot22_32_h(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_h1);
+                block_c[m][2] = dot22_32_h(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_h2);
+                block_c[m][3] = dot22_32_h(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_h3);
             }
             a_ptr += 32;
         }
@@ -337,11 +405,11 @@ __global__ void gemm_half_q_half_kernel
         {
             group++;
             scales_idx++;
-            qs_f0 = scales[scales_idx][0];
-            qs_f1 = scales[scales_idx][1];
-            qs_f2 = scales[scales_idx][2];
-            qs_f3 = scales[scales_idx][3];
-            nextgroup += groupsize;
+            qs_h0 = scales[scales_idx][0];
+            qs_h1 = scales[scales_idx][1];
+            qs_h2 = scales[scales_idx][2];
+            qs_h3 = scales[scales_idx][3];
+            nextgroup += b_q_group_map[k * 2 + 1];
         }
 
         #pragma unroll
@@ -358,10 +426,11 @@ __global__ void gemm_half_q_half_kernel
 
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = dot22_8_f(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_f0);
-                block_c[m][1] = dot22_8_f(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_f1);
-                block_c[m][2] = dot22_8_f(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_f2);
-                block_c[m][3] = dot22_8_f(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_f3);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = dot22_8_h(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_h0);
+                block_c[m][1] = dot22_8_h(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_h1);
+                block_c[m][2] = dot22_8_h(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_h2);
+                block_c[m][3] = dot22_8_h(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_h3);
             }
             a_ptr += 8;
         }
@@ -374,11 +443,11 @@ __global__ void gemm_half_q_half_kernel
         {
             group++;
             scales_idx++;
-            qs_f0 = scales[scales_idx][0];
-            qs_f1 = scales[scales_idx][1];
-            qs_f2 = scales[scales_idx][2];
-            qs_f3 = scales[scales_idx][3];
-            nextgroup += groupsize;
+            qs_h0 = scales[scales_idx][0];
+            qs_h1 = scales[scales_idx][1];
+            qs_h2 = scales[scales_idx][2];
+            qs_h3 = scales[scales_idx][3];
+            nextgroup += b_q_group_map[k * 2 + 1];
         }
 
         #pragma unroll
@@ -397,10 +466,11 @@ __global__ void gemm_half_q_half_kernel
 
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = dot22_32_f(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_f0);
-                block_c[m][1] = dot22_32_f(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_f1);
-                block_c[m][2] = dot22_32_f(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_f2);
-                block_c[m][3] = dot22_32_f(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_f3);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = dot22_32_h(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_h0);
+                block_c[m][1] = dot22_32_h(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_h1);
+                block_c[m][2] = dot22_32_h(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_h2);
+                block_c[m][3] = dot22_32_h(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_h3);
             }
             a_ptr += 32;
         }
@@ -413,15 +483,15 @@ __global__ void gemm_half_q_half_kernel
         {
             group++;
             scales_idx++;
-            qs_f0 = scales[scales_idx][0];
-            qs_f1 = scales[scales_idx][1];
-            qs_f2 = scales[scales_idx][2];
-            qs_f3 = scales[scales_idx][3];
-            nextgroup += groupsize;
+            qs_h0 = scales[scales_idx][0];
+            qs_h1 = scales[scales_idx][1];
+            qs_h2 = scales[scales_idx][2];
+            qs_h3 = scales[scales_idx][3];
+            nextgroup += b_q_group_map[k * 2 + 1];
         }
 
         #pragma unroll
-        for (int j = 0; j < 2; j++)
+        for (int j = 0; j < 1; j++)
         {
             int4 load_int4[1];
             load_int4[0] = *((int4*) b_ptr); b_ptr += size_n;
@@ -434,15 +504,16 @@ __global__ void gemm_half_q_half_kernel
 
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = dot22_16_f(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_f0);
-                block_c[m][1] = dot22_16_f(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_f1);
-                block_c[m][2] = dot22_16_f(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_f2);
-                block_c[m][3] = dot22_16_f(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_f3);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = dot22_16_h(dq[0], a_ptr + m * a_stride, block_c[m][0], qs_h0);
+                block_c[m][1] = dot22_16_h(dq[1], a_ptr + m * a_stride, block_c[m][1], qs_h1);
+                block_c[m][2] = dot22_16_h(dq[2], a_ptr + m * a_stride, block_c[m][2], qs_h2);
+                block_c[m][3] = dot22_16_h(dq[3], a_ptr + m * a_stride, block_c[m][3], qs_h3);
             }
 
             a_ptr += 16;
         }
-        k += 32;
+        k += 16;
     }
 
     // Accumulate column sums in c
@@ -450,38 +521,60 @@ __global__ void gemm_half_q_half_kernel
     for (int m = 0; m < m_count; m++)
     {
         half2* out = (half2*)c_.item_ptr(offset_m + m, n);
-        half2 result01 = __halves2half2(__float2half_rn(block_c[m][0]), __float2half_rn(block_c[m][1]));
-        half2 result23 = __halves2half2(__float2half_rn(block_c[m][2]), __float2half_rn(block_c[m][3]));
+        half2 result01 = __halves2half2(block_c[m][0], block_c[m][1]);
+        half2 result23 = __halves2half2(block_c[m][2], block_c[m][3]);
+
+        if constexpr (mul_r_weights)
+        {
+            half2 w_mul2 = __half2half2(weights[m].as_half);
+            result01 = __hmul2(result01, w_mul2);
+            result23 = __hmul2(result23, w_mul2);
+        }
+
         atomicAdd(out    , result01);
         atomicAdd(out + 1, result23);
+//        *out = result01;
+//        *(out + 1) = result23;
     }
 }
 
-fp_gemm_half_q_half_kernel pick_gemm_half_q_half_kernel(bool first_block, const int m_count)
+template <bool use_r_weights, bool mul_r_weights>
+struct map_m_count_exl2 {
+    static constexpr fp_gemm_half_q_half_kernel pick_gemm_half_q_half_kernel(const int m_count)
+    {
+        #if EXL2_BLOCK_M_SIZE_MAX >= 1
+        if (m_count == 1) return gemm_half_q_half_kernel<1, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 2
+        if (m_count == 2) return gemm_half_q_half_kernel<2, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 3
+        if (m_count == 3) return gemm_half_q_half_kernel<3, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 4
+        if (m_count == 4) return gemm_half_q_half_kernel<4, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 5
+        if (m_count == 5) return gemm_half_q_half_kernel<5, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 6
+        if (m_count == 6) return gemm_half_q_half_kernel<6, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 7
+        if (m_count == 7) return gemm_half_q_half_kernel<7, use_r_weights, mul_r_weights>;
+        #endif
+        #if EXL2_BLOCK_M_SIZE_MAX >= 8
+        if (m_count == 8) return gemm_half_q_half_kernel<8, use_r_weights, mul_r_weights>;
+        #endif
+        return NULL;
+    }
+};
+
+fp_gemm_half_q_half_kernel pick_gemm_half_q_half_kernel(const int m_count, bool r_weights, bool mul_r_weights)
 {
-    #if BLOCK_M_SIZE_MAX >= 1
-    if (m_count == 1) return gemm_half_q_half_kernel<true, 1>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 2
-    if (m_count == 2) return gemm_half_q_half_kernel<true, 2>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 3
-    if (m_count == 3) return gemm_half_q_half_kernel<true, 3>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 4
-    if (m_count == 4) return gemm_half_q_half_kernel<true, 4>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 5
-    if (m_count == 5) return gemm_half_q_half_kernel<true, 5>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 6
-    if (m_count == 6) return gemm_half_q_half_kernel<true, 6>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 7
-    if (m_count == 7) return gemm_half_q_half_kernel<true, 7>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 8
-    if (m_count == 8) return gemm_half_q_half_kernel<true, 8>;
-    #endif
+    if (!r_weights && !mul_r_weights) return map_m_count_exl2<false, false>::pick_gemm_half_q_half_kernel(m_count);
+    if (!r_weights &&  mul_r_weights) return map_m_count_exl2<false,  true>::pick_gemm_half_q_half_kernel(m_count);
+    if ( r_weights && !mul_r_weights) return map_m_count_exl2< true, false>::pick_gemm_half_q_half_kernel(m_count);
+    if ( r_weights &&  mul_r_weights) return map_m_count_exl2< true,  true>::pick_gemm_half_q_half_kernel(m_count);
     return NULL;
 }
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh
index ebaa42d0..f816fd9d 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_gemm_kernel_gptq.cuh
@@ -18,6 +18,15 @@ __forceinline__ __device__ float dot22_8_f(half2(&dq)[4], const half* a_ptr)
     return __half2float(__low2half(result)) + __half2float(__high2half(result));
 }
 
+__forceinline__ __device__ half2 dot22_8_h2(half2(&dq)[4], const half* a_ptr)
+{
+    half2 result = {};
+    const half2* a2_ptr = (const half2*)a_ptr;
+    #pragma unroll
+    for (int i = 0; i < 4; i++) result = __hfma2(dq[i], *a2_ptr++, result);
+    return result;
+}
+
 typedef void (*fp_gemm_half_q_half_gptq_kernel)
 (
     const half*,
@@ -32,10 +41,12 @@ typedef void (*fp_gemm_half_q_half_gptq_kernel)
     const int,
     const uint16_t*,
     const int,
-    const bool
+    const bool,
+    const half*,
+    const int
 );
 
-template <bool first_block, int m_count>
+template <int m_count, bool use_r_weights, bool mul_r_weights>
 __global__ void gemm_half_q_half_gptq_kernel
 (
     const half* __restrict__ a,
@@ -50,7 +61,9 @@ __global__ void gemm_half_q_half_gptq_kernel
     const int groupsize,
     const uint16_t* __restrict__ b_q_perm,
     const int rows_4,
-    const bool clear
+    const bool clear,
+    const half* r_weights,
+    const int r_weights_stride
 )
 {
     MatrixView_half a_(a, size_m, size_k);
@@ -62,19 +75,35 @@ __global__ void gemm_half_q_half_gptq_kernel
 
     // Block
 
-    int offset_n = blockIdx.x * BLOCK_KN_SIZE * 4;
+    int offset_n = blockIdx.x * GPTQ_BLOCK_KN_SIZE * 4;
     int offset_m = blockIdx.y * m_count;
-    int offset_k = blockIdx.z * BLOCK_KN_SIZE;
+    int offset_k = blockIdx.z * GPTQ_BLOCK_KN_SIZE;
 
-    int end_n = min(offset_n + BLOCK_KN_SIZE * 4, size_n);
+    int end_n = min(offset_n + GPTQ_BLOCK_KN_SIZE * 4, size_n);
     int end_m = min(offset_m + m_count, size_m);
-    int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
+    int end_k = min(offset_k + GPTQ_BLOCK_KN_SIZE, size_k);
 
     int n = offset_n + t * 4;
 
+    // Read weights
+
+    half_uint16 weights[MAX_Q_GEMM_WEIGHTS];
+    if constexpr (use_r_weights)
+    {
+        uint16_t any_w = 0;
+        const half* w_ptr = r_weights;
+        for (int m = 0; m < m_count; ++m)
+        {
+            weights[m].as_half = *w_ptr;
+            w_ptr += r_weights_stride;
+            any_w |= weights[m].as_uint16;
+        }
+        if (!any_w) return;  // Early exit if all weights are zero -- does not zero output (!!!)
+    }
+
     // Preload block_a
 
-    __shared__ half block_a[m_count][BLOCK_KN_SIZE];
+    __shared__ half block_a[m_count][GPTQ_BLOCK_KN_SIZE];
 
     if (offset_k + t < end_k)
     {
@@ -113,26 +142,26 @@ __global__ void gemm_half_q_half_gptq_kernel
 
     const uint32_t* b_ptr = b_q_weight + qk * size_n + n;
     const half* a_ptr = &block_a[0][0];
-    int a_stride = BLOCK_KN_SIZE;
+    int a_stride = GPTQ_BLOCK_KN_SIZE;
 
     // Initial group
 
     int zeros[4];
-    float scales[4];
+    half2 scales[4];
     half2 z1z16[4][2];
     half2 y1y16[4][2];
     b_gptq_qzeros_.item4(zeros, group, n);
-    b_gptq_scales_.item4_f(scales, group, n);
-    dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
-    dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
-    dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
-    dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
+    b_gptq_scales_.item4_h2(scales, group, n);
+    dequant_4bit_8_prep_zero((zeros[0] + 1) & 0x0F, z1z16[0], y1y16[0]);
+    dequant_4bit_8_prep_zero((zeros[1] + 1) & 0x0F, z1z16[1], y1y16[1]);
+    dequant_4bit_8_prep_zero((zeros[2] + 1) & 0x0F, z1z16[2], y1y16[2]);
+    dequant_4bit_8_prep_zero((zeros[3] + 1) & 0x0F, z1z16[3], y1y16[3]);
 
 //    __syncthreads();
 
     // Column result
 
-    float block_c[m_count][4] = {};
+    half2 block_c[m_count][4] = {};
 
     // Dequantize and multiply
 
@@ -144,11 +173,11 @@ __global__ void gemm_half_q_half_gptq_kernel
             group++;
             nextgroup += groupsize;
             b_gptq_qzeros_.item4(zeros, group, n);
-            b_gptq_scales_.item4_f(scales, group, n);
-            dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
-            dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
-            dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
-            dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
+            b_gptq_scales_.item4_h2(scales, group, n);
+            dequant_4bit_8_prep_zero((zeros[0] + 1) & 0x0F, z1z16[0], y1y16[0]);
+            dequant_4bit_8_prep_zero((zeros[1] + 1) & 0x0F, z1z16[1], y1y16[1]);
+            dequant_4bit_8_prep_zero((zeros[2] + 1) & 0x0F, z1z16[2], y1y16[2]);
+            dequant_4bit_8_prep_zero((zeros[3] + 1) & 0x0F, z1z16[3], y1y16[3]);
         }
 
         #pragma unroll
@@ -166,10 +195,11 @@ __global__ void gemm_half_q_half_gptq_kernel
             #pragma unroll
             for (int m = 0; m < m_count; m++)
             {
-                block_c[m][0] = fma(dot22_8_f(dq[0], a_ptr + m * a_stride), scales[0], block_c[m][0]);
-                block_c[m][1] = fma(dot22_8_f(dq[1], a_ptr + m * a_stride), scales[1], block_c[m][1]);
-                block_c[m][2] = fma(dot22_8_f(dq[2], a_ptr + m * a_stride), scales[2], block_c[m][2]);
-                block_c[m][3] = fma(dot22_8_f(dq[3], a_ptr + m * a_stride), scales[3], block_c[m][3]);
+                if constexpr (use_r_weights) { if (!weights[m].as_uint16) continue; }
+                block_c[m][0] = __hfma2(dot22_8_h2(dq[0], a_ptr + m * a_stride), scales[0], block_c[m][0]);
+                block_c[m][1] = __hfma2(dot22_8_h2(dq[1], a_ptr + m * a_stride), scales[1], block_c[m][1]);
+                block_c[m][2] = __hfma2(dot22_8_h2(dq[2], a_ptr + m * a_stride), scales[2], block_c[m][2]);
+                block_c[m][3] = __hfma2(dot22_8_h2(dq[3], a_ptr + m * a_stride), scales[3], block_c[m][3]);
             }
 
             b_ptr += size_n;
@@ -182,38 +212,62 @@ __global__ void gemm_half_q_half_gptq_kernel
     for (int m = 0; m < m_count; m++)
     {
         half2 *out = (half2*) c_.item_ptr(offset_m + m, n);
-        half2 result01 = __halves2half2(__float2half_rn(block_c[m][0]), __float2half_rn(block_c[m][1]));
-        half2 result23 = __halves2half2(__float2half_rn(block_c[m][2]), __float2half_rn(block_c[m][3]));
+        half result0 = __hadd(__low2half(block_c[m][0]), __high2half(block_c[m][0]));
+        half result1 = __hadd(__low2half(block_c[m][1]), __high2half(block_c[m][1]));
+        half result2 = __hadd(__low2half(block_c[m][2]), __high2half(block_c[m][2]));
+        half result3 = __hadd(__low2half(block_c[m][3]), __high2half(block_c[m][3]));
+        half2 result01 = __halves2half2(result0, result1);
+        half2 result23 = __halves2half2(result2, result3);
+
+        if constexpr (mul_r_weights)
+        {
+            half2 w_mul2 = __half2half2(weights[m].as_half);
+            result01 = __hmul2(result01, w_mul2);
+            result23 = __hmul2(result23, w_mul2);
+        }
+
         atomicAdd(out    , result01);
         atomicAdd(out + 1, result23);
     }
 }
 
-fp_gemm_half_q_half_gptq_kernel pick_gemm_half_q_half_gptq_kernel(bool first_block, const int m_count)
+template <bool use_r_weights, bool mul_r_weights>
+struct map_m_count_gptq {
+    static constexpr fp_gemm_half_q_half_gptq_kernel pick_gemm_half_q_half_gptq_kernel(int m_count)
+    {
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 1
+        if (m_count == 1) return gemm_half_q_half_gptq_kernel<1, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 2
+        if (m_count == 2) return gemm_half_q_half_gptq_kernel<2, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 3
+        if (m_count == 3) return gemm_half_q_half_gptq_kernel<3, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 4
+        if (m_count == 4) return gemm_half_q_half_gptq_kernel<4, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 5
+        if (m_count == 5) return gemm_half_q_half_gptq_kernel<5, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 6
+        if (m_count == 6) return gemm_half_q_half_gptq_kernel<6, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 7
+        if (m_count == 7) return gemm_half_q_half_gptq_kernel<7, use_r_weights, mul_r_weights>;
+        #endif
+        #if GPTQ_BLOCK_M_SIZE_MAX >= 8
+        if (m_count == 8) return gemm_half_q_half_gptq_kernel<8, use_r_weights, mul_r_weights>;
+        #endif
+        return NULL;
+    }
+};
+
+fp_gemm_half_q_half_gptq_kernel pick_gemm_half_q_half_gptq_kernel(const int m_count, bool r_weights, bool mul_r_weights)
 {
-    #if BLOCK_M_SIZE_MAX >= 1
-    if (m_count == 1) return gemm_half_q_half_gptq_kernel<true, 1>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 2
-    if (m_count == 2) return gemm_half_q_half_gptq_kernel<true, 2>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 3
-    if (m_count == 3) return gemm_half_q_half_gptq_kernel<true, 3>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 4
-    if (m_count == 4) return gemm_half_q_half_gptq_kernel<true, 4>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 5
-    if (m_count == 5) return gemm_half_q_half_gptq_kernel<true, 5>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 6
-    if (m_count == 6) return gemm_half_q_half_gptq_kernel<true, 6>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 7
-    if (m_count == 7) return gemm_half_q_half_gptq_kernel<true, 7>;
-    #endif
-    #if BLOCK_M_SIZE_MAX >= 8
-    if (m_count == 8) return gemm_half_q_half_gptq_kernel<true, 8>;
-    #endif
+    if (!r_weights && !mul_r_weights) return map_m_count_gptq<false, false>::pick_gemm_half_q_half_gptq_kernel(m_count);
+    if (!r_weights &&  mul_r_weights) return map_m_count_gptq<false,  true>::pick_gemm_half_q_half_gptq_kernel(m_count);
+    if ( r_weights && !mul_r_weights) return map_m_count_gptq< true, false>::pick_gemm_half_q_half_gptq_kernel(m_count);
+    if ( r_weights &&  mul_r_weights) return map_m_count_gptq< true,  true>::pick_gemm_half_q_half_gptq_kernel(m_count);
     return NULL;
 }
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu
index 6aed7470..f7a91e29 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cu
@@ -57,6 +57,7 @@ QMatrix::QMatrix
     uint32_t* _q_scale,
     half* _q_scale_max,
     uint16_t* _q_groups,
+    uint16_t* _q_group_map,
 
     uint32_t* _gptq_qzeros,
     half* _gptq_scales,
@@ -80,13 +81,17 @@ QMatrix::QMatrix
     cuda_q_scale = _q_scale;
     cuda_q_scale_max = _q_scale_max;
     cuda_q_groups = _q_groups;
+    cuda_q_group_map = _q_group_map;
     cuda_gptq_qzeros = _gptq_qzeros;
     cuda_gptq_scales = _gptq_scales;
 
     is_gptq = (_gptq_qzeros != NULL);
 
-    groupsize = 1;
-    while (groupsize * groups < height) groupsize *= 2;
+    if (is_gptq)
+    {
+        gptq_groupsize = 1;
+        while (gptq_groupsize * groups < height) gptq_groupsize *= 2;
+    }
 
     // Create group map
 
@@ -102,15 +107,26 @@ QMatrix::QMatrix
         uint16_t* cpu_q_groups = (uint16_t*)calloc(groups * 2, sizeof(uint16_t));
         cudaMemcpy(cpu_q_groups, cuda_q_groups, groups * 2 * sizeof(uint16_t), cudaMemcpyDeviceToHost);
 
+        int row = 0;
         for (int i = 0; i < groups; i++)
         {
             int bits = cpu_q_groups[i * 2];
-            if (bits == 8) rows_8 += groupsize;
-            if (bits == 6) rows_6 += groupsize;
-            if (bits == 5) rows_5 += groupsize;
-            if (bits == 4) rows_4 += groupsize;
-            if (bits == 3) rows_3 += groupsize;
-            if (bits == 2) rows_2 += groupsize;
+
+            int rows;
+            if (i < groups - 1)
+            {
+                int qrows = cpu_q_groups[i * 2 + 3] - cpu_q_groups[i * 2 + 1];
+                rows = qrows * 32 / bits;
+            }
+            else rows = height - row;
+
+            if (bits == 8) rows_8 += rows;
+            if (bits == 6) rows_6 += rows;
+            if (bits == 5) rows_5 += rows;
+            if (bits == 4) rows_4 += rows;
+            if (bits == 3) rows_3 += rows;
+            if (bits == 2) rows_2 += rows;
+            row += rows;
         }
 
         free(cpu_q_groups);
@@ -138,6 +154,13 @@ QMatrix::QMatrix
         }
     }
 
+//     DBGI(rows_8);
+//     DBGI(rows_6);
+//     DBGI(rows_5);
+//     DBGI(rows_4);
+//     DBGI(rows_3);
+//     DBGI(rows_2);
+
     // Shuffle quantized data
 
     dim3 blockDim, gridDim;
@@ -145,8 +168,9 @@ QMatrix::QMatrix
     blockDim.y = 1;
     gridDim.x = DIVIDE(width, THREADS_X);
     gridDim.y = 1;
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-    shuffle_kernel<<<gridDim, blockDim>>>(cuda_q_weight, height, width, rows_8, rows_6, rows_5, rows_4, rows_3, rows_2);
+    shuffle_kernel<<<gridDim, blockDim, 0, stream>>>(cuda_q_weight, height, width, rows_8, rows_6, rows_5, rows_4, rows_3, rows_2);
 }
 
 QMatrix::~QMatrix()
@@ -214,10 +238,10 @@ __global__ void reconstruct_gptq_kernel
     half2 y1y16[4][2];
     b_gptq_qzeros_.item4(zeros, group, n);
     b_gptq_scales_.item4_h2(scales, group, n);
-    dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
-    dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
-    dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
-    dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
+    dequant_4bit_8_prep_zero((zeros[0] + 1) & 0x0F, z1z16[0], y1y16[0]);
+    dequant_4bit_8_prep_zero((zeros[1] + 1) & 0x0F, z1z16[1], y1y16[1]);
+    dequant_4bit_8_prep_zero((zeros[2] + 1) & 0x0F, z1z16[2], y1y16[2]);
+    dequant_4bit_8_prep_zero((zeros[3] + 1) & 0x0F, z1z16[3], y1y16[3]);
 
     __syncthreads();
 
@@ -232,10 +256,10 @@ __global__ void reconstruct_gptq_kernel
             nextgroup += groupsize;
             b_gptq_qzeros_.item4(zeros, group, n);
             b_gptq_scales_.item4_h2(scales, group, n);
-            dequant_4bit_8_prep_zero(zeros[0] + 1, z1z16[0], y1y16[0]);
-            dequant_4bit_8_prep_zero(zeros[1] + 1, z1z16[1], y1y16[1]);
-            dequant_4bit_8_prep_zero(zeros[2] + 1, z1z16[2], y1y16[2]);
-            dequant_4bit_8_prep_zero(zeros[3] + 1, z1z16[3], y1y16[3]);
+            dequant_4bit_8_prep_zero((zeros[0] + 1) & 0x0F, z1z16[0], y1y16[0]);
+            dequant_4bit_8_prep_zero((zeros[1] + 1) & 0x0F, z1z16[1], y1y16[1]);
+            dequant_4bit_8_prep_zero((zeros[2] + 1) & 0x0F, z1z16[2], y1y16[2]);
+            dequant_4bit_8_prep_zero((zeros[3] + 1) & 0x0F, z1z16[3], y1y16[3]);
         }
 
         for (int p = 0; p < 4; p++)
@@ -283,10 +307,10 @@ __global__ void reconstruct_kernel
     const uint16_t* __restrict__ b_q_perm,
     const uint32_t* __restrict__ b_q_scale,
     const half* __restrict__ b_q_scale_max,
-    //const uint16_t* __restrict__ b_q_groups,
+    const uint16_t* __restrict__ b_q_group_map,
     const int size_k,
     const int size_n,
-    const int groupsize,
+    //const int groupsize,
     const int groups,
     half* __restrict__ b,
     const int rows_8,
@@ -317,7 +341,8 @@ __global__ void reconstruct_kernel
 
     // Find initial group
 
-    int group = offset_k / groupsize;
+    // int group = offset_k / groupsize;
+    int group = b_q_group_map[offset_k * 2];
 
     int pre_rows_8 = min(rows_8, offset_k);
     int pre_rows_6 = offset_k > rows_8 ? min(rows_6, offset_k) - rows_8 : 0;
@@ -337,7 +362,7 @@ __global__ void reconstruct_kernel
 
     half qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]);
     half2 qs_h2 = __halves2half2(qs_h, qs_h);
-    int nextgroup = offset_k + groupsize;
+    int nextgroup = offset_k + b_q_group_map[offset_k * 2 + 1];
 
     int end_k = min(offset_k + BLOCK_KN_SIZE, size_k);
     int k = offset_k;
@@ -347,7 +372,7 @@ __global__ void reconstruct_kernel
 
     while (k < rows_8 && k < end_k)
     {
-        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += groupsize; qs_h2 = __halves2half2(qs_h, qs_h); }
+        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += b_q_group_map[k * 2 + 1]; qs_h2 = __halves2half2(qs_h, qs_h); }
         for (int p = 0; p < 4; p++)
         {
             half2 dq[4];
@@ -363,7 +388,7 @@ __global__ void reconstruct_kernel
 
     while (k < rows_6 && k < end_k)
     {
-        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += groupsize; qs_h2 = __halves2half2(qs_h, qs_h); }
+        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += b_q_group_map[k * 2 + 1]; qs_h2 = __halves2half2(qs_h, qs_h); }
         for (int p = 0; p < 2; p++)
         {
             half2 dq[8];
@@ -380,7 +405,7 @@ __global__ void reconstruct_kernel
 
     while (k < rows_5 && k < end_k)
     {
-        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += groupsize; qs_h2 = __halves2half2(qs_h, qs_h); }
+        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += b_q_group_map[k * 2 + 1]; qs_h2 = __halves2half2(qs_h, qs_h); }
         for (int p = 0; p < 1; p++)
         {
             half2 dq[16];
@@ -399,7 +424,7 @@ __global__ void reconstruct_kernel
 
     while (k < rows_4 && k < end_k)
     {
-        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += groupsize; qs_h2 = __halves2half2(qs_h, qs_h); }
+        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += b_q_group_map[k * 2 + 1]; qs_h2 = __halves2half2(qs_h, qs_h); }
         for (int p = 0; p < 4; p++)
         {
             half2 dq[4];
@@ -414,7 +439,7 @@ __global__ void reconstruct_kernel
 
     while (k < rows_3 && k < end_k)
     {
-        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += groupsize; qs_h2 = __halves2half2(qs_h, qs_h); }
+        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += b_q_group_map[k * 2 + 1]; qs_h2 = __halves2half2(qs_h, qs_h); }
         for (int p = 0; p < 1; p++)
         {
             half2 dq[16];
@@ -431,8 +456,8 @@ __global__ void reconstruct_kernel
 
     while (k < rows_2 && k < end_k)
     {
-        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += groupsize; qs_h2 = __halves2half2(qs_h, qs_h); }
-        for (int p = 0; p < 2; p++)
+        if (k == nextgroup) { group++; qs_h = dq_scale(b_q_scale_.item(group, n), b_q_scale_max[group]); nextgroup += b_q_group_map[k * 2 + 1]; qs_h2 = __halves2half2(qs_h, qs_h); }
+        for (int p = 0; p < 1; p++)
         {
             half2 dq[8];
             uint32_t q_0 = *b_ptr; b_ptr += size_n;
@@ -441,7 +466,7 @@ __global__ void reconstruct_kernel
             half* dqh = (half*) dq;
             for (int j = 0; j < 16; j++) b_.set(perm[lk++], n, dqh[j]);
         }
-        k += 32;
+        k += 16;
     }
 }
 
@@ -451,20 +476,21 @@ void QMatrix::reconstruct(half* out)
     blockDim.x = BLOCK_KN_SIZE;
     blockDim.y = 1;
     gridDim.y = DIVIDE(height, BLOCK_KN_SIZE);
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
     if (!is_gptq)
     {
         gridDim.x = DIVIDE(width, BLOCK_KN_SIZE);
-        reconstruct_kernel<<<gridDim, blockDim>>>
+        reconstruct_kernel<<<gridDim, blockDim, 0, stream>>>
         (
             cuda_q_weight,
             cuda_q_perm,
             cuda_q_scale,
             cuda_q_scale_max,
-            //cuda_q_groups,
+            cuda_q_group_map,
             height,
             width,
-            groupsize,
+            //groupsize,
             groups,
             out,
             rows_8,
@@ -478,7 +504,7 @@ void QMatrix::reconstruct(half* out)
     else
     {
         gridDim.x = DIVIDE(width, BLOCK_KN_SIZE * 4);
-        reconstruct_gptq_kernel<<<gridDim, blockDim>>>
+        reconstruct_gptq_kernel<<<gridDim, blockDim, 0, stream>>>
         (
             cuda_q_weight,
             cuda_q_perm,
@@ -487,7 +513,7 @@ void QMatrix::reconstruct(half* out)
             //const uint16_t* __restrict__ b_q_groups,
             height,
             width,
-            groupsize,
+            gptq_groupsize,
             groups,
             out,
             rows_4
@@ -539,6 +565,7 @@ __global__ void make_sequential_kernel
 
 bool QMatrix::make_sequential(const uint32_t* cpu_g_idx)
 {
+    const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     uint32_t* cuda_new_qweight = NULL;
     cudaError_t err = cudaMalloc(&cuda_new_qweight, height / 8 * width * sizeof(uint32_t));
     if (err != cudaSuccess) {
@@ -597,7 +624,7 @@ bool QMatrix::make_sequential(const uint32_t* cpu_g_idx)
     gridDim.x = DIVIDE(width, THREADS_X);
     gridDim.y = height / 8;
 
-    make_sequential_kernel<<<gridDim, blockDim>>>
+    make_sequential_kernel<<<gridDim, blockDim, 0, stream>>>
     (
         cuda_q_weight,
         cuda_new_qweight,
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh
index dda83a4f..d36b8d66 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/q_matrix.cuh
@@ -18,7 +18,7 @@ public:
     int height;
     int width;
     int groups;
-    int groupsize;
+    int gptq_groupsize;
 
     int rows_8;
     int rows_6;
@@ -33,6 +33,7 @@ public:
     uint32_t* cuda_q_scale = NULL;
     half* cuda_q_scale_max = NULL;
     uint16_t* cuda_q_groups = NULL;
+    uint16_t* cuda_q_group_map = NULL;
     uint32_t* cuda_gptq_qzeros = NULL;
     half* cuda_gptq_scales = NULL;
 
@@ -53,6 +54,7 @@ public:
         uint32_t* _q_scale,
         half* _q_scale_max,
         uint16_t* _q_groups,
+        uint16_t* _q_group_map,
 
         uint32_t* _gptq_qzeros,
         half* _gptq_scales,
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
index 3beaeefa..90c18a0c 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_2.cuh
@@ -100,4 +100,4 @@ __forceinline__ __device__ void dequant_2bit_16
 
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
index 5fb070d0..ad95edb4 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_4.cuh
@@ -224,4 +224,4 @@ __forceinline__ __device__ void dequant_4bit_8_gptq
 
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
index 454e4b93..78d81f92 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_5.cuh
@@ -204,4 +204,4 @@ __forceinline__ __device__ void dequant_5bit_32
 
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
index c2eb8cfb..562fe695 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_6.cuh
@@ -40,5 +40,3 @@ __forceinline__ __device__ void dequant_6bit_16
 #endif
 
 #endif
-
-
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
index e2409efa..6e6bedbd 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_8.cuh
@@ -35,4 +35,4 @@ __forceinline__ __device__ void dequant_8bit_8
 
 #endif
 
-#endif
\ No newline at end of file
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh
index 71657191..cac9df9c 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/quant/qdq_util.cuh
@@ -7,6 +7,7 @@ union half2_uint32
     half2 as_half2;
     __device__ half2_uint32(uint32_t val) : as_uint32(val) {}
     __device__ half2_uint32(half2 val) : as_half2(val) {}
+    __device__ half2_uint32() : as_uint32(0) {}
 };
 
 union half_uint16
@@ -15,6 +16,7 @@ union half_uint16
     half as_half;
     __device__ half_uint16(uint16_t val) : as_uint16(val) {}
     __device__ half_uint16(half val) : as_half(val) {}
+    __device__ half_uint16() : as_uint16(0) {}
 };
 
 // Max_scale premultiplied by 1/256
diff --git a/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh b/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
index 06a58d18..e167bc23 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
+++ b/server/exllamav2_kernels/exllamav2_kernels/cuda/util.cuh
@@ -1,3 +1,11 @@
+#ifndef _util_cuh
+#define _util_cuh
+
+#include <cuda_runtime.h>
+#include <cuda_fp16.h>
+#include <cstdint>
+#include <cstdio>
+#include <ATen/cuda/CUDAContext.h>
 
 #define DIVIDE(x, size) (((x) + (size) - 1) / (size))
 
@@ -40,3 +48,7 @@ inline void gpu_assert(cudaError_t code, const char *file, int line, bool abort=
       if (abort) exit(code);
    }
 }
+
+void print_global_mem(const half* ptr, int rows, int columns, int stride);
+
+#endif
diff --git a/server/exllamav2_kernels/exllamav2_kernels/ext.cpp b/server/exllamav2_kernels/exllamav2_kernels/ext.cpp
index 5e52e6ab..ff4e1851 100644
--- a/server/exllamav2_kernels/exllamav2_kernels/ext.cpp
+++ b/server/exllamav2_kernels/exllamav2_kernels/ext.cpp
@@ -31,6 +31,7 @@ uintptr_t make_q_matrix
     torch::Tensor q_scale,
     torch::Tensor q_scale_max,
     torch::Tensor q_groups,
+    torch::Tensor q_group_map,
     torch::Tensor gptq_qzeros,
     torch::Tensor gptq_scales,
     torch::Tensor gptq_g_idx,
@@ -43,6 +44,7 @@ uintptr_t make_q_matrix
     TORCH_CHECK_DTYPE_OPT(q_scale, kInt);
     TORCH_CHECK_DTYPE_OPT(q_scale_max, kHalf);
     TORCH_CHECK_DTYPE_OPT(q_groups, kShort);
+    TORCH_CHECK_DTYPE_OPT(q_group_map, kShort);
     TORCH_CHECK_DTYPE_OPT(gptq_qzeros, kInt);
     TORCH_CHECK_DTYPE_OPT(gptq_scales, kHalf);
     TORCH_CHECK_DTYPE_OPT(gptq_g_idx, kInt);
@@ -83,12 +85,15 @@ uintptr_t make_q_matrix
         q_scale.device().is_meta() ? NULL : (uint32_t*) q_scale.data_ptr(),
         q_scale_max.device().is_meta() ? NULL : (half*) q_scale_max.data_ptr(),
         q_groups.device().is_meta() ? NULL : (uint16_t*) q_groups.data_ptr(),
+        q_group_map.device().is_meta() ? NULL : (uint16_t*) q_group_map.data_ptr(),
         gptq_qzeros.device().is_meta() ? NULL : (uint32_t*) gptq_qzeros.data_ptr(),
         gptq_scales.device().is_meta() ? NULL : (half*) gptq_scales.data_ptr(),
         gptq_g_idx.device().is_meta() ? NULL : (uint32_t*) gptq_g_idx.data_ptr(),
         (half*) temp_dq.data_ptr()
     );
 
+    if (m->failed) throw std::runtime_error("CUDA out of memory");
+
     return reinterpret_cast<uintptr_t> (m);
 }
 
diff --git a/server/exllamav2_kernels/setup.py b/server/exllamav2_kernels/setup.py
index 518db1df..4a16b546 100644
--- a/server/exllamav2_kernels/setup.py
+++ b/server/exllamav2_kernels/setup.py
@@ -1,5 +1,15 @@
 from setuptools import setup
 from torch.utils.cpp_extension import BuildExtension, CUDAExtension
+import torch
+
+extra_cuda_cflags = ["-lineinfo", "-O3"]
+
+if torch.version.hip:
+    extra_cuda_cflags += ["-DHIPBLAS_USE_HIP_HALF"]
+
+extra_compile_args = {
+    "nvcc": extra_cuda_cflags,
+}
 
 setup(
     name="exllamav2_kernels",
@@ -11,6 +21,7 @@ setup(
                 "exllamav2_kernels/cuda/q_matrix.cu",
                 "exllamav2_kernels/cuda/q_gemm.cu",
             ],
+            extra_compile_args=extra_compile_args,
         )
     ],
     cmdclass={"build_ext": BuildExtension},
diff --git a/server/poetry.lock b/server/poetry.lock
index 400c6e54..f7d40699 100644
--- a/server/poetry.lock
+++ b/server/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -32,87 +32,87 @@ testing = ["bitsandbytes", "datasets", "deepspeed (<0.13.0)", "evaluate", "param
 
 [[package]]
 name = "aiohttp"
-version = "3.9.0"
+version = "3.9.5"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6896b8416be9ada4d22cd359d7cb98955576ce863eadad5596b7cdfbf3e17c6c"},
-    {file = "aiohttp-3.9.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1736d87dad8ef46a8ec9cddd349fa9f7bd3a064c47dd6469c0d6763d3d49a4fc"},
-    {file = "aiohttp-3.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c9e5f4d7208cda1a2bb600e29069eecf857e6980d0ccc922ccf9d1372c16f4b"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8488519aa05e636c5997719fe543c8daf19f538f4fa044f3ce94bee608817cff"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5ab16c254e2312efeb799bc3c06897f65a133b38b69682bf75d1f1ee1a9c43a9"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7a94bde005a8f926d0fa38b88092a03dea4b4875a61fbcd9ac6f4351df1b57cd"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b777c9286b6c6a94f50ddb3a6e730deec327e9e2256cb08b5530db0f7d40fd8"},
-    {file = "aiohttp-3.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:571760ad7736b34d05597a1fd38cbc7d47f7b65deb722cb8e86fd827404d1f6b"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:deac0a32aec29608eb25d730f4bc5a261a65b6c48ded1ed861d2a1852577c932"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4ee1b4152bc3190cc40ddd6a14715e3004944263ea208229ab4c297712aa3075"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:3607375053df58ed6f23903aa10cf3112b1240e8c799d243bbad0f7be0666986"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:65b0a70a25456d329a5e1426702dde67be0fb7a4ead718005ba2ca582d023a94"},
-    {file = "aiohttp-3.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a2eb5311a37fe105aa35f62f75a078537e1a9e4e1d78c86ec9893a3c97d7a30"},
-    {file = "aiohttp-3.9.0-cp310-cp310-win32.whl", hash = "sha256:2cbc14a13fb6b42d344e4f27746a4b03a2cb0c1c3c5b932b0d6ad8881aa390e3"},
-    {file = "aiohttp-3.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:ac9669990e2016d644ba8ae4758688534aabde8dbbc81f9af129c3f5f01ca9cd"},
-    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f8e05f5163528962ce1d1806fce763ab893b1c5b7ace0a3538cd81a90622f844"},
-    {file = "aiohttp-3.9.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4afa8f71dba3a5a2e1e1282a51cba7341ae76585345c43d8f0e624882b622218"},
-    {file = "aiohttp-3.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f929f4c9b9a00f3e6cc0587abb95ab9c05681f8b14e0fe1daecfa83ea90f8318"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28185e36a78d247c55e9fbea2332d16aefa14c5276a582ce7a896231c6b1c208"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a486ddf57ab98b6d19ad36458b9f09e6022de0381674fe00228ca7b741aacb2f"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70e851f596c00f40a2f00a46126c95c2e04e146015af05a9da3e4867cfc55911"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5b7bf8fe4d39886adc34311a233a2e01bc10eb4e842220235ed1de57541a896"},
-    {file = "aiohttp-3.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c67a51ea415192c2e53e4e048c78bab82d21955b4281d297f517707dc836bf3d"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:694df243f394629bcae2d8ed94c589a181e8ba8604159e6e45e7b22e58291113"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3dd8119752dd30dd7bca7d4bc2a92a59be6a003e4e5c2cf7e248b89751b8f4b7"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:eb6dfd52063186ac97b4caa25764cdbcdb4b10d97f5c5f66b0fa95052e744eb7"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:d97c3e286d0ac9af6223bc132dc4bad6540b37c8d6c0a15fe1e70fb34f9ec411"},
-    {file = "aiohttp-3.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:816f4db40555026e4cdda604a1088577c1fb957d02f3f1292e0221353403f192"},
-    {file = "aiohttp-3.9.0-cp311-cp311-win32.whl", hash = "sha256:3abf0551874fecf95f93b58f25ef4fc9a250669a2257753f38f8f592db85ddea"},
-    {file = "aiohttp-3.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:e18d92c3e9e22553a73e33784fcb0ed484c9874e9a3e96c16a8d6a1e74a0217b"},
-    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:99ae01fb13a618b9942376df77a1f50c20a281390dad3c56a6ec2942e266220d"},
-    {file = "aiohttp-3.9.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:05857848da443c8c12110d99285d499b4e84d59918a21132e45c3f0804876994"},
-    {file = "aiohttp-3.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:317719d7f824eba55857fe0729363af58e27c066c731bc62cd97bc9c3d9c7ea4"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1e3b3c107ccb0e537f309f719994a55621acd2c8fdf6d5ce5152aed788fb940"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45820ddbb276113ead8d4907a7802adb77548087ff5465d5c554f9aa3928ae7d"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:05a183f1978802588711aed0dea31e697d760ce9055292db9dc1604daa9a8ded"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a4cd44788ea0b5e6bb8fa704597af3a30be75503a7ed1098bc5b8ffdf6c982"},
-    {file = "aiohttp-3.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:673343fbc0c1ac44d0d2640addc56e97a052504beacd7ade0dc5e76d3a4c16e8"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7e8a3b79b6d186a9c99761fd4a5e8dd575a48d96021f220ac5b5fa856e5dd029"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6777a390e41e78e7c45dab43a4a0196c55c3b8c30eebe017b152939372a83253"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:7ae5f99a32c53731c93ac3075abd3e1e5cfbe72fc3eaac4c27c9dd64ba3b19fe"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:f1e4f254e9c35d8965d377e065c4a8a55d396fe87c8e7e8429bcfdeeb229bfb3"},
-    {file = "aiohttp-3.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11ca808f9a6b63485059f5f6e164ef7ec826483c1212a44f268b3653c91237d8"},
-    {file = "aiohttp-3.9.0-cp312-cp312-win32.whl", hash = "sha256:de3cc86f4ea8b4c34a6e43a7306c40c1275e52bfa9748d869c6b7d54aa6dad80"},
-    {file = "aiohttp-3.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:ca4fddf84ac7d8a7d0866664936f93318ff01ee33e32381a115b19fb5a4d1202"},
-    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f09960b5bb1017d16c0f9e9f7fc42160a5a49fa1e87a175fd4a2b1a1833ea0af"},
-    {file = "aiohttp-3.9.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8303531e2c17b1a494ffaeba48f2da655fe932c4e9a2626c8718403c83e5dd2b"},
-    {file = "aiohttp-3.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4790e44f46a4aa07b64504089def5744d3b6780468c4ec3a1a36eb7f2cae9814"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1d7edf74a36de0e5ca50787e83a77cf352f5504eb0ffa3f07000a911ba353fb"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:94697c7293199c2a2551e3e3e18438b4cba293e79c6bc2319f5fd652fccb7456"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a1b66dbb8a7d5f50e9e2ea3804b01e766308331d0cac76eb30c563ac89c95985"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9623cfd9e85b76b83ef88519d98326d4731f8d71869867e47a0b979ffec61c73"},
-    {file = "aiohttp-3.9.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f32c86dc967ab8c719fd229ce71917caad13cc1e8356ee997bf02c5b368799bf"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:f50b4663c3e0262c3a361faf440761fbef60ccdde5fe8545689a4b3a3c149fb4"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:dcf71c55ec853826cd70eadb2b6ac62ec577416442ca1e0a97ad875a1b3a0305"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:42fe4fd9f0dfcc7be4248c162d8056f1d51a04c60e53366b0098d1267c4c9da8"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:76a86a9989ebf82ee61e06e2bab408aec4ea367dc6da35145c3352b60a112d11"},
-    {file = "aiohttp-3.9.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f9e09a1c83521d770d170b3801eea19b89f41ccaa61d53026ed111cb6f088887"},
-    {file = "aiohttp-3.9.0-cp38-cp38-win32.whl", hash = "sha256:a00ce44c21612d185c5275c5cba4bab8d7c1590f248638b667ed8a782fa8cd6f"},
-    {file = "aiohttp-3.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:d5b9345ab92ebe6003ae11d8092ce822a0242146e6fa270889b9ba965457ca40"},
-    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:98d21092bf2637c5fa724a428a69e8f5955f2182bff61f8036827cf6ce1157bf"},
-    {file = "aiohttp-3.9.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:35a68cd63ca6aaef5707888f17a70c36efe62b099a4e853d33dc2e9872125be8"},
-    {file = "aiohttp-3.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3d7f6235c7475658acfc1769d968e07ab585c79f6ca438ddfecaa9a08006aee2"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:db04d1de548f7a62d1dd7e7cdf7c22893ee168e22701895067a28a8ed51b3735"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:536b01513d67d10baf6f71c72decdf492fb7433c5f2f133e9a9087379d4b6f31"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c8b0a6487e8109427ccf638580865b54e2e3db4a6e0e11c02639231b41fc0f"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7276fe0017664414fdc3618fca411630405f1aaf0cc3be69def650eb50441787"},
-    {file = "aiohttp-3.9.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:23170247ef89ffa842a02bbfdc425028574d9e010611659abeb24d890bc53bb8"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b1a2ea8252cacc7fd51df5a56d7a2bb1986ed39be9397b51a08015727dfb69bd"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:2d71abc15ff7047412ef26bf812dfc8d0d1020d664617f4913df2df469f26b76"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:2d820162c8c2bdbe97d328cd4f417c955ca370027dce593345e437b2e9ffdc4d"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:2779f5e7c70f7b421915fd47db332c81de365678180a9f3ab404088f87ba5ff9"},
-    {file = "aiohttp-3.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:366bc870d7ac61726f32a489fbe3d1d8876e87506870be66b01aeb84389e967e"},
-    {file = "aiohttp-3.9.0-cp39-cp39-win32.whl", hash = "sha256:1df43596b826022b14998f0460926ce261544fedefe0d2f653e1b20f49e96454"},
-    {file = "aiohttp-3.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:9c196b30f1b1aa3363a69dd69079ae9bec96c2965c4707eaa6914ba099fb7d4f"},
-    {file = "aiohttp-3.9.0.tar.gz", hash = "sha256:09f23292d29135025e19e8ff4f0a68df078fe4ee013bca0105b2e803989de92d"},
+    {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fcde4c397f673fdec23e6b05ebf8d4751314fa7c24f93334bf1f1364c1c69ac7"},
+    {file = "aiohttp-3.9.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5d6b3f1fabe465e819aed2c421a6743d8debbde79b6a8600739300630a01bf2c"},
+    {file = "aiohttp-3.9.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6ae79c1bc12c34082d92bf9422764f799aee4746fd7a392db46b7fd357d4a17a"},
+    {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d3ebb9e1316ec74277d19c5f482f98cc65a73ccd5430540d6d11682cd857430"},
+    {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84dabd95154f43a2ea80deffec9cb44d2e301e38a0c9d331cc4aa0166fe28ae3"},
+    {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c8a02fbeca6f63cb1f0475c799679057fc9268b77075ab7cf3f1c600e81dd46b"},
+    {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c26959ca7b75ff768e2776d8055bf9582a6267e24556bb7f7bd29e677932be72"},
+    {file = "aiohttp-3.9.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:714d4e5231fed4ba2762ed489b4aec07b2b9953cf4ee31e9871caac895a839c0"},
+    {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7a6a8354f1b62e15d48e04350f13e726fa08b62c3d7b8401c0a1314f02e3558"},
+    {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c413016880e03e69d166efb5a1a95d40f83d5a3a648d16486592c49ffb76d0db"},
+    {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ff84aeb864e0fac81f676be9f4685f0527b660f1efdc40dcede3c251ef1e867f"},
+    {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ad7f2919d7dac062f24d6f5fe95d401597fbb015a25771f85e692d043c9d7832"},
+    {file = "aiohttp-3.9.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:702e2c7c187c1a498a4e2b03155d52658fdd6fda882d3d7fbb891a5cf108bb10"},
+    {file = "aiohttp-3.9.5-cp310-cp310-win32.whl", hash = "sha256:67c3119f5ddc7261d47163ed86d760ddf0e625cd6246b4ed852e82159617b5fb"},
+    {file = "aiohttp-3.9.5-cp310-cp310-win_amd64.whl", hash = "sha256:471f0ef53ccedec9995287f02caf0c068732f026455f07db3f01a46e49d76bbb"},
+    {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ae53e33ee7476dd3d1132f932eeb39bf6125083820049d06edcdca4381f342"},
+    {file = "aiohttp-3.9.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c088c4d70d21f8ca5c0b8b5403fe84a7bc8e024161febdd4ef04575ef35d474d"},
+    {file = "aiohttp-3.9.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:639d0042b7670222f33b0028de6b4e2fad6451462ce7df2af8aee37dcac55424"},
+    {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f26383adb94da5e7fb388d441bf09c61e5e35f455a3217bfd790c6b6bc64b2ee"},
+    {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:66331d00fb28dc90aa606d9a54304af76b335ae204d1836f65797d6fe27f1ca2"},
+    {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff550491f5492ab5ed3533e76b8567f4b37bd2995e780a1f46bca2024223233"},
+    {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f22eb3a6c1080d862befa0a89c380b4dafce29dc6cd56083f630073d102eb595"},
+    {file = "aiohttp-3.9.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a81b1143d42b66ffc40a441379387076243ef7b51019204fd3ec36b9f69e77d6"},
+    {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f64fd07515dad67f24b6ea4a66ae2876c01031de91c93075b8093f07c0a2d93d"},
+    {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:93e22add827447d2e26d67c9ac0161756007f152fdc5210277d00a85f6c92323"},
+    {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:55b39c8684a46e56ef8c8d24faf02de4a2b2ac60d26cee93bc595651ff545de9"},
+    {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4715a9b778f4293b9f8ae7a0a7cef9829f02ff8d6277a39d7f40565c737d3771"},
+    {file = "aiohttp-3.9.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:afc52b8d969eff14e069a710057d15ab9ac17cd4b6753042c407dcea0e40bf75"},
+    {file = "aiohttp-3.9.5-cp311-cp311-win32.whl", hash = "sha256:b3df71da99c98534be076196791adca8819761f0bf6e08e07fd7da25127150d6"},
+    {file = "aiohttp-3.9.5-cp311-cp311-win_amd64.whl", hash = "sha256:88e311d98cc0bf45b62fc46c66753a83445f5ab20038bcc1b8a1cc05666f428a"},
+    {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:c7a4b7a6cf5b6eb11e109a9755fd4fda7d57395f8c575e166d363b9fc3ec4678"},
+    {file = "aiohttp-3.9.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:0a158704edf0abcac8ac371fbb54044f3270bdbc93e254a82b6c82be1ef08f3c"},
+    {file = "aiohttp-3.9.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d153f652a687a8e95ad367a86a61e8d53d528b0530ef382ec5aaf533140ed00f"},
+    {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82a6a97d9771cb48ae16979c3a3a9a18b600a8505b1115cfe354dfb2054468b4"},
+    {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60cdbd56f4cad9f69c35eaac0fbbdf1f77b0ff9456cebd4902f3dd1cf096464c"},
+    {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8676e8fd73141ded15ea586de0b7cda1542960a7b9ad89b2b06428e97125d4fa"},
+    {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da00da442a0e31f1c69d26d224e1efd3a1ca5bcbf210978a2ca7426dfcae9f58"},
+    {file = "aiohttp-3.9.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:18f634d540dd099c262e9f887c8bbacc959847cfe5da7a0e2e1cf3f14dbf2daf"},
+    {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:320e8618eda64e19d11bdb3bd04ccc0a816c17eaecb7e4945d01deee2a22f95f"},
+    {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:2faa61a904b83142747fc6a6d7ad8fccff898c849123030f8e75d5d967fd4a81"},
+    {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:8c64a6dc3fe5db7b1b4d2b5cb84c4f677768bdc340611eca673afb7cf416ef5a"},
+    {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:393c7aba2b55559ef7ab791c94b44f7482a07bf7640d17b341b79081f5e5cd1a"},
+    {file = "aiohttp-3.9.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c671dc117c2c21a1ca10c116cfcd6e3e44da7fcde37bf83b2be485ab377b25da"},
+    {file = "aiohttp-3.9.5-cp312-cp312-win32.whl", hash = "sha256:5a7ee16aab26e76add4afc45e8f8206c95d1d75540f1039b84a03c3b3800dd59"},
+    {file = "aiohttp-3.9.5-cp312-cp312-win_amd64.whl", hash = "sha256:5ca51eadbd67045396bc92a4345d1790b7301c14d1848feaac1d6a6c9289e888"},
+    {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:694d828b5c41255e54bc2dddb51a9f5150b4eefa9886e38b52605a05d96566e8"},
+    {file = "aiohttp-3.9.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:0605cc2c0088fcaae79f01c913a38611ad09ba68ff482402d3410bf59039bfb8"},
+    {file = "aiohttp-3.9.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4558e5012ee03d2638c681e156461d37b7a113fe13970d438d95d10173d25f78"},
+    {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dbc053ac75ccc63dc3a3cc547b98c7258ec35a215a92bd9f983e0aac95d3d5b"},
+    {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4109adee842b90671f1b689901b948f347325045c15f46b39797ae1bf17019de"},
+    {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a6ea1a5b409a85477fd8e5ee6ad8f0e40bf2844c270955e09360418cfd09abac"},
+    {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3c2890ca8c59ee683fd09adf32321a40fe1cf164e3387799efb2acebf090c11"},
+    {file = "aiohttp-3.9.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3916c8692dbd9d55c523374a3b8213e628424d19116ac4308e434dbf6d95bbdd"},
+    {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d1964eb7617907c792ca00b341b5ec3e01ae8c280825deadbbd678447b127e1"},
+    {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:d5ab8e1f6bee051a4bf6195e38a5c13e5e161cb7bad83d8854524798bd9fcd6e"},
+    {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:52c27110f3862a1afbcb2af4281fc9fdc40327fa286c4625dfee247c3ba90156"},
+    {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:7f64cbd44443e80094309875d4f9c71d0401e966d191c3d469cde4642bc2e031"},
+    {file = "aiohttp-3.9.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b4f72fbb66279624bfe83fd5eb6aea0022dad8eec62b71e7bf63ee1caadeafe"},
+    {file = "aiohttp-3.9.5-cp38-cp38-win32.whl", hash = "sha256:6380c039ec52866c06d69b5c7aad5478b24ed11696f0e72f6b807cfb261453da"},
+    {file = "aiohttp-3.9.5-cp38-cp38-win_amd64.whl", hash = "sha256:da22dab31d7180f8c3ac7c7635f3bcd53808f374f6aa333fe0b0b9e14b01f91a"},
+    {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1732102949ff6087589408d76cd6dea656b93c896b011ecafff418c9661dc4ed"},
+    {file = "aiohttp-3.9.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c6021d296318cb6f9414b48e6a439a7f5d1f665464da507e8ff640848ee2a58a"},
+    {file = "aiohttp-3.9.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:239f975589a944eeb1bad26b8b140a59a3a320067fb3cd10b75c3092405a1372"},
+    {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b7b30258348082826d274504fbc7c849959f1989d86c29bc355107accec6cfb"},
+    {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cd2adf5c87ff6d8b277814a28a535b59e20bfea40a101db6b3bdca7e9926bc24"},
+    {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a3d838441bebcf5cf442700e3963f58b5c33f015341f9ea86dcd7d503c07e2"},
+    {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e3a1ae66e3d0c17cf65c08968a5ee3180c5a95920ec2731f53343fac9bad106"},
+    {file = "aiohttp-3.9.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9c69e77370cce2d6df5d12b4e12bdcca60c47ba13d1cbbc8645dd005a20b738b"},
+    {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0cbf56238f4bbf49dab8c2dc2e6b1b68502b1e88d335bea59b3f5b9f4c001475"},
+    {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d1469f228cd9ffddd396d9948b8c9cd8022b6d1bf1e40c6f25b0fb90b4f893ed"},
+    {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:45731330e754f5811c314901cebdf19dd776a44b31927fa4b4dbecab9e457b0c"},
+    {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3fcb4046d2904378e3aeea1df51f697b0467f2aac55d232c87ba162709478c46"},
+    {file = "aiohttp-3.9.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8cf142aa6c1a751fcb364158fd710b8a9be874b81889c2bd13aa8893197455e2"},
+    {file = "aiohttp-3.9.5-cp39-cp39-win32.whl", hash = "sha256:7b179eea70833c8dee51ec42f3b4097bd6370892fa93f510f76762105568cf09"},
+    {file = "aiohttp-3.9.5-cp39-cp39-win_amd64.whl", hash = "sha256:38d80498e2e169bc61418ff36170e0aad0cd268da8b38a17c4cf29d254a8b3f1"},
+    {file = "aiohttp-3.9.5.tar.gz", hash = "sha256:edea7d15772ceeb29db4aff55e482d4bcfb6ae160ce144f2682de02f6d693551"},
 ]
 
 [package.dependencies]
@@ -140,6 +140,17 @@ files = [
 [package.dependencies]
 frozenlist = ">=1.1.0"
 
+[[package]]
+name = "annotated-types"
+version = "0.6.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "annotated_types-0.6.0-py3-none-any.whl", hash = "sha256:0641064de18ba7a25dee8f96403ebc39113d0cb953a01429249d5c7564666a43"},
+    {file = "annotated_types-0.6.0.tar.gz", hash = "sha256:563339e807e53ffd9c267e99fc6d9ea23eb8443c08f112651963e24e22f84a5d"},
+]
+
 [[package]]
 name = "async-timeout"
 version = "4.0.3"
@@ -153,21 +164,22 @@ files = [
 
 [[package]]
 name = "attrs"
-version = "23.1.0"
+version = "23.2.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"},
-    {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"},
+    {file = "attrs-23.2.0-py3-none-any.whl", hash = "sha256:99b87a485a5820b23b879f04c2305b44b951b502fd64be915879d77a7e8fc6f1"},
+    {file = "attrs-23.2.0.tar.gz", hash = "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30"},
 ]
 
 [package.extras]
 cov = ["attrs[tests]", "coverage[toml] (>=5.3)"]
-dev = ["attrs[docs,tests]", "pre-commit"]
+dev = ["attrs[tests]", "pre-commit"]
 docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"]
 tests = ["attrs[tests-no-zope]", "zope-interface"]
-tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.6)", "pytest-mypy-plugins"]
+tests-no-zope = ["attrs[tests-mypy]", "cloudpickle", "hypothesis", "pympler", "pytest (>=4.3.0)", "pytest-xdist[psutil]"]
 
 [[package]]
 name = "backoff"
@@ -182,13 +194,13 @@ files = [
 
 [[package]]
 name = "certifi"
-version = "2023.11.17"
+version = "2024.2.2"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
 files = [
-    {file = "certifi-2023.11.17-py3-none-any.whl", hash = "sha256:e036ab49d5b79556f99cfc2d9320b34cfbe5be05c5871b51de9329f0603b0474"},
-    {file = "certifi-2023.11.17.tar.gz", hash = "sha256:9b469f3a900bf28dc19b8cfbf8019bf47f7fdd1a65a1d4ffb98fc14166beb4d1"},
+    {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"},
+    {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"},
 ]
 
 [[package]]
@@ -304,6 +316,17 @@ files = [
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
 
+[[package]]
+name = "cloudpickle"
+version = "3.0.0"
+description = "Pickler class to extend the standard pickle.Pickler functionality"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "cloudpickle-3.0.0-py3-none-any.whl", hash = "sha256:246ee7d0c295602a036e86369c77fecda4ab17b506496730f2f576d9016fd9c7"},
+    {file = "cloudpickle-3.0.0.tar.gz", hash = "sha256:996d9a482c6fb4f33c1a35335cf8afd065d2a56e973270364840712d9131a882"},
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -334,25 +357,26 @@ cron = ["capturer (>=2.4)"]
 
 [[package]]
 name = "datasets"
-version = "2.14.7"
+version = "2.19.0"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "datasets-2.14.7-py3-none-any.whl", hash = "sha256:1a64041a7da4f4130f736fc371c1f528b8ddd208cebe156400f65719bdbba79d"},
-    {file = "datasets-2.14.7.tar.gz", hash = "sha256:394cf9b4ec0694b25945977b16ad5d18d5c15fb0e94141713eb8ead7452caf9e"},
+    {file = "datasets-2.19.0-py3-none-any.whl", hash = "sha256:f57c5316e123d4721b970c68c1cb856505f289cda58f5557ffe745b49c011a8e"},
+    {file = "datasets-2.19.0.tar.gz", hash = "sha256:0b47e08cc7af2c6800a42cadc4657b22a0afc7197786c8986d703c08d90886a6"},
 ]
 
 [package.dependencies]
 aiohttp = "*"
-dill = ">=0.3.0,<0.3.8"
-fsspec = {version = ">=2023.1.0,<=2023.10.0", extras = ["http"]}
-huggingface-hub = ">=0.14.0,<1.0.0"
+dill = ">=0.3.0,<0.3.9"
+filelock = "*"
+fsspec = {version = ">=2023.1.0,<=2024.3.1", extras = ["http"]}
+huggingface-hub = ">=0.21.2"
 multiprocess = "*"
 numpy = ">=1.17"
 packaging = "*"
 pandas = "*"
-pyarrow = ">=8.0.0"
+pyarrow = ">=12.0.0"
 pyarrow-hotfix = "*"
 pyyaml = ">=5.1"
 requests = ">=2.19.0"
@@ -360,18 +384,18 @@ tqdm = ">=4.62.1"
 xxhash = "*"
 
 [package.extras]
-apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"]
+apache-beam = ["apache-beam (>=2.26.0)"]
 audio = ["librosa", "soundfile (>=0.12.1)"]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"]
-docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"]
-jax = ["jax (>=0.2.8,!=0.3.2,<=0.3.25)", "jaxlib (>=0.1.65,<=0.3.25)"]
+dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
+docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
+jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
-quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"]
+quality = ["ruff (>=0.3.0)"]
 s3 = ["s3fs"]
-tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"]
-tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
-tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"]
+tensorflow = ["tensorflow (>=2.6.0)"]
+tensorflow-gpu = ["tensorflow (>=2.6.0)"]
+tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers", "typing-extensions (>=4.6.1)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=6.2.1)"]
 
@@ -424,27 +448,39 @@ training = ["Jinja2", "accelerate (>=0.11.0)", "datasets", "peft (>=0.6.0)", "pr
 
 [[package]]
 name = "dill"
-version = "0.3.7"
+version = "0.3.8"
 description = "serialize all of Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"},
-    {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"},
+    {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
+    {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
 ]
 
 [package.extras]
 graph = ["objgraph (>=1.7.2)"]
+profile = ["gprof2dot (>=2022.7.29)"]
+
+[[package]]
+name = "diskcache"
+version = "5.6.3"
+description = "Disk Cache -- Disk and file backed persistent cache."
+optional = true
+python-versions = ">=3"
+files = [
+    {file = "diskcache-5.6.3-py3-none-any.whl", hash = "sha256:5e31b2d5fbad117cc363ebaf6b689474db18a1f6438bc82358b024abd4c2ca19"},
+    {file = "diskcache-5.6.3.tar.gz", hash = "sha256:2c3a3fa2743d8535d832ec61c2054a1641f41775aa7c556758a109941e33e4fc"},
+]
 
 [[package]]
 name = "exceptiongroup"
-version = "1.2.0"
+version = "1.2.1"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"},
-    {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"},
+    {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"},
+    {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"},
 ]
 
 [package.extras]
@@ -452,104 +488,119 @@ test = ["pytest (>=6)"]
 
 [[package]]
 name = "filelock"
-version = "3.13.1"
+version = "3.13.4"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "filelock-3.13.1-py3-none-any.whl", hash = "sha256:57dbda9b35157b05fb3e58ee91448612eb674172fab98ee235ccb0b5bee19a1c"},
-    {file = "filelock-3.13.1.tar.gz", hash = "sha256:521f5f56c50f8426f5e03ad3b281b490a87ef15bc6c526f168290f0c7148d44e"},
+    {file = "filelock-3.13.4-py3-none-any.whl", hash = "sha256:404e5e9253aa60ad457cae1be07c0f0ca90a63931200a47d9b6a6af84fd7b45f"},
+    {file = "filelock-3.13.4.tar.gz", hash = "sha256:d13f466618bfde72bd2c18255e269f72542c6e70e7bac83a0232d6b1cc5c8cf4"},
 ]
 
 [package.extras]
-docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.24)"]
-testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"]
 typing = ["typing-extensions (>=4.8)"]
 
 [[package]]
 name = "frozenlist"
-version = "1.4.0"
+version = "1.4.1"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"},
-    {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"},
-    {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"},
-    {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"},
-    {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"},
-    {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"},
-    {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"},
-    {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"},
-    {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"},
-    {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"},
-    {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"},
-    {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"},
-    {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"},
-    {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"},
-    {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"},
-    {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"},
-    {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f9aa1878d1083b276b0196f2dfbe00c9b7e752475ed3b682025ff20c1c1f51ac"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:29acab3f66f0f24674b7dc4736477bcd4bc3ad4b896f5f45379a67bce8b96868"},
+    {file = "frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:74fb4bee6880b529a0c6560885fce4dc95936920f9f20f53d99a213f7bf66776"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:590344787a90ae57d62511dd7c736ed56b428f04cd8c161fcc5e7232c130c69a"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:068b63f23b17df8569b7fdca5517edef76171cf3897eb68beb01341131fbd2ad"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c849d495bf5154cd8da18a9eb15db127d4dba2968d88831aff6f0331ea9bd4c"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9750cc7fe1ae3b1611bb8cfc3f9ec11d532244235d75901fb6b8e42ce9229dfe"},
+    {file = "frozenlist-1.4.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b2de4cf0cdd5bd2dee4c4f63a653c61d2408055ab77b151c1957f221cabf2a"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0633c8d5337cb5c77acbccc6357ac49a1770b8c487e5b3505c57b949b4b82e98"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:27657df69e8801be6c3638054e202a135c7f299267f1a55ed3a598934f6c0d75"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:f9a3ea26252bd92f570600098783d1371354d89d5f6b7dfd87359d669f2109b5"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:4f57dab5fe3407b6c0c1cc907ac98e8a189f9e418f3b6e54d65a718aaafe3950"},
+    {file = "frozenlist-1.4.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e02a0e11cf6597299b9f3bbd3f93d79217cb90cfd1411aec33848b13f5c656cc"},
+    {file = "frozenlist-1.4.1-cp310-cp310-win32.whl", hash = "sha256:a828c57f00f729620a442881cc60e57cfcec6842ba38e1b19fd3e47ac0ff8dc1"},
+    {file = "frozenlist-1.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:f56e2333dda1fe0f909e7cc59f021eba0d2307bc6f012a1ccf2beca6ba362439"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:a0cb6f11204443f27a1628b0e460f37fb30f624be6051d490fa7d7e26d4af3d0"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b46c8ae3a8f1f41a0d2ef350c0b6e65822d80772fe46b653ab6b6274f61d4a49"},
+    {file = "frozenlist-1.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fde5bd59ab5357e3853313127f4d3565fc7dad314a74d7b5d43c22c6a5ed2ced"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e1124aec435320ae01ee3ac7bec11a5d47f25d0ed6328f2273d287bc3abb0"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2471c201b70d58a0f0c1f91261542a03d9a5e088ed3dc6c160d614c01649c106"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c757a9dd70d72b076d6f68efdbb9bc943665ae954dad2801b874c8c69e185068"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f146e0911cb2f1da549fc58fc7bcd2b836a44b79ef871980d605ec392ff6b0d2"},
+    {file = "frozenlist-1.4.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9c515e7914626b2a2e1e311794b4c35720a0be87af52b79ff8e1429fc25f19"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c302220494f5c1ebeb0912ea782bcd5e2f8308037b3c7553fad0e48ebad6ad82"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:442acde1e068288a4ba7acfe05f5f343e19fac87bfc96d89eb886b0363e977ec"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:1b280e6507ea8a4fa0c0a7150b4e526a8d113989e28eaaef946cc77ffd7efc0a"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:fe1a06da377e3a1062ae5fe0926e12b84eceb8a50b350ddca72dc85015873f74"},
+    {file = "frozenlist-1.4.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:db9e724bebd621d9beca794f2a4ff1d26eed5965b004a97f1f1685a173b869c2"},
+    {file = "frozenlist-1.4.1-cp311-cp311-win32.whl", hash = "sha256:e774d53b1a477a67838a904131c4b0eef6b3d8a651f8b138b04f748fccfefe17"},
+    {file = "frozenlist-1.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:fb3c2db03683b5767dedb5769b8a40ebb47d6f7f45b1b3e3b4b51ec8ad9d9825"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:1979bc0aeb89b33b588c51c54ab0161791149f2461ea7c7c946d95d5f93b56ae"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cc7b01b3754ea68a62bd77ce6020afaffb44a590c2289089289363472d13aedb"},
+    {file = "frozenlist-1.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c9c92be9fd329ac801cc420e08452b70e7aeab94ea4233a4804f0915c14eba9b"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c3894db91f5a489fc8fa6a9991820f368f0b3cbdb9cd8849547ccfab3392d86"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba60bb19387e13597fb059f32cd4d59445d7b18b69a745b8f8e5db0346f33480"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8aefbba5f69d42246543407ed2461db31006b0f76c4e32dfd6f42215a2c41d09"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:780d3a35680ced9ce682fbcf4cb9c2bad3136eeff760ab33707b71db84664e3a"},
+    {file = "frozenlist-1.4.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9acbb16f06fe7f52f441bb6f413ebae6c37baa6ef9edd49cdd567216da8600cd"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:23b701e65c7b36e4bf15546a89279bd4d8675faabc287d06bbcfac7d3c33e1e6"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:3e0153a805a98f5ada7e09826255ba99fb4f7524bb81bf6b47fb702666484ae1"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:dd9b1baec094d91bf36ec729445f7769d0d0cf6b64d04d86e45baf89e2b9059b"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:1a4471094e146b6790f61b98616ab8e44f72661879cc63fa1049d13ef711e71e"},
+    {file = "frozenlist-1.4.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5667ed53d68d91920defdf4035d1cdaa3c3121dc0b113255124bcfada1cfa1b8"},
+    {file = "frozenlist-1.4.1-cp312-cp312-win32.whl", hash = "sha256:beee944ae828747fd7cb216a70f120767fc9f4f00bacae8543c14a6831673f89"},
+    {file = "frozenlist-1.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:64536573d0a2cb6e625cf309984e2d873979709f2cf22839bf2d61790b448ad5"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:20b51fa3f588ff2fe658663db52a41a4f7aa6c04f6201449c6c7c476bd255c0d"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410478a0c562d1a5bcc2f7ea448359fcb050ed48b3c6f6f4f18c313a9bdb1826"},
+    {file = "frozenlist-1.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c6321c9efe29975232da3bd0af0ad216800a47e93d763ce64f291917a381b8eb"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:48f6a4533887e189dae092f1cf981f2e3885175f7a0f33c91fb5b7b682b6bab6"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6eb73fa5426ea69ee0e012fb59cdc76a15b1283d6e32e4f8dc4482ec67d1194d"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbeb989b5cc29e8daf7f976b421c220f1b8c731cbf22b9130d8815418ea45887"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:32453c1de775c889eb4e22f1197fe3bdfe457d16476ea407472b9442e6295f7a"},
+    {file = "frozenlist-1.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:693945278a31f2086d9bf3df0fe8254bbeaef1fe71e1351c3bd730aa7d31c41b"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:1d0ce09d36d53bbbe566fe296965b23b961764c0bcf3ce2fa45f463745c04701"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3a670dc61eb0d0eb7080890c13de3066790f9049b47b0de04007090807c776b0"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:dca69045298ce5c11fd539682cff879cc1e664c245d1c64da929813e54241d11"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a06339f38e9ed3a64e4c4e43aec7f59084033647f908e4259d279a52d3757d09"},
+    {file = "frozenlist-1.4.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b7f2f9f912dca3934c1baec2e4585a674ef16fe00218d833856408c48d5beee7"},
+    {file = "frozenlist-1.4.1-cp38-cp38-win32.whl", hash = "sha256:e7004be74cbb7d9f34553a5ce5fb08be14fb33bc86f332fb71cbe5216362a497"},
+    {file = "frozenlist-1.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:5a7d70357e7cee13f470c7883a063aae5fe209a493c57d86eb7f5a6f910fae09"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:bfa4a17e17ce9abf47a74ae02f32d014c5e9404b6d9ac7f729e01562bbee601e"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7e3ed87d4138356775346e6845cccbe66cd9e207f3cd11d2f0b9fd13681359d"},
+    {file = "frozenlist-1.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c99169d4ff810155ca50b4da3b075cbde79752443117d89429595c2e8e37fed8"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edb678da49d9f72c9f6c609fbe41a5dfb9a9282f9e6a2253d5a91e0fc382d7c0"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6db4667b187a6742b33afbbaf05a7bc551ffcf1ced0000a571aedbb4aa42fc7b"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55fdc093b5a3cb41d420884cdaf37a1e74c3c37a31f46e66286d9145d2063bd0"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82e8211d69a4f4bc360ea22cd6555f8e61a1bd211d1d5d39d3d228b48c83a897"},
+    {file = "frozenlist-1.4.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89aa2c2eeb20957be2d950b85974b30a01a762f3308cd02bb15e1ad632e22dc7"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9d3e0c25a2350080e9319724dede4f31f43a6c9779be48021a7f4ebde8b2d742"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7268252af60904bf52c26173cbadc3a071cece75f873705419c8681f24d3edea"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0c250a29735d4f15321007fb02865f0e6b6a41a6b88f1f523ca1596ab5f50bd5"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:96ec70beabbd3b10e8bfe52616a13561e58fe84c0101dd031dc78f250d5128b9"},
+    {file = "frozenlist-1.4.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:23b2d7679b73fe0e5a4560b672a39f98dfc6f60df63823b0a9970525325b95f6"},
+    {file = "frozenlist-1.4.1-cp39-cp39-win32.whl", hash = "sha256:a7496bfe1da7fb1a4e1cc23bb67c58fab69311cc7d32b5a99c2007b4b2a0e932"},
+    {file = "frozenlist-1.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:e6a20a581f9ce92d389a8c7d7c3dd47c81fd5d6e655c8dddf341e14aa48659d0"},
+    {file = "frozenlist-1.4.1-py3-none-any.whl", hash = "sha256:04ced3e6a46b4cfffe20f9ae482818e34eba9b5fb0ce4056e4cc9b6e212d09b7"},
+    {file = "frozenlist-1.4.1.tar.gz", hash = "sha256:c037a86e8513059a2613aaba4d817bb90b9d9b6b69aace3ce9c877e8c8ed402b"},
 ]
 
 [[package]]
 name = "fsspec"
-version = "2023.10.0"
+version = "2024.3.1"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fsspec-2023.10.0-py3-none-any.whl", hash = "sha256:346a8f024efeb749d2a5fca7ba8854474b1ff9af7c3faaf636a4548781136529"},
-    {file = "fsspec-2023.10.0.tar.gz", hash = "sha256:330c66757591df346ad3091a53bd907e15348c2ba17d63fd54f5c39c4457d2a5"},
+    {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"},
+    {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"},
 ]
 
 [package.dependencies]
 aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
-requests = {version = "*", optional = true, markers = "extra == \"http\""}
 
 [package.extras]
 abfs = ["adlfs"]
@@ -566,7 +617,7 @@ github = ["requests"]
 gs = ["gcsfs"]
 gui = ["panel"]
 hdfs = ["pyarrow (>=1)"]
-http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"]
+http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)"]
 libarchive = ["libarchive-c"]
 oci = ["ocifs"]
 s3 = ["s3fs"]
@@ -577,13 +628,13 @@ tqdm = ["tqdm"]
 
 [[package]]
 name = "googleapis-common-protos"
-version = "1.61.0"
+version = "1.63.0"
 description = "Common protobufs used in Google APIs"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "googleapis-common-protos-1.61.0.tar.gz", hash = "sha256:8a64866a97f6304a7179873a465d6eee97b7a24ec6cfd78e0f575e96b821240b"},
-    {file = "googleapis_common_protos-1.61.0-py2.py3-none-any.whl", hash = "sha256:22f1915393bb3245343f6efe87f6fe868532efc12aa26b391b15132e1279f1c0"},
+    {file = "googleapis-common-protos-1.63.0.tar.gz", hash = "sha256:17ad01b11d5f1d0171c06d3ba5c04c54474e883b66b949722b4938ee2694ef4e"},
+    {file = "googleapis_common_protos-1.63.0-py2.py3-none-any.whl", hash = "sha256:ae45f75702f7c08b541f750854a678bd8f534a1a6bace6afe975f1d0a82d6632"},
 ]
 
 [package.dependencies]
@@ -611,69 +662,69 @@ testing = ["protobuf (>=4.21.9)"]
 
 [[package]]
 name = "grpcio"
-version = "1.59.3"
+version = "1.62.2"
 description = "HTTP/2-based RPC framework"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "grpcio-1.59.3-cp310-cp310-linux_armv7l.whl", hash = "sha256:aca028a6c7806e5b61e5f9f4232432c52856f7fcb98e330b20b6bc95d657bdcc"},
-    {file = "grpcio-1.59.3-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:19ad26a7967f7999c8960d2b9fe382dae74c55b0c508c613a6c2ba21cddf2354"},
-    {file = "grpcio-1.59.3-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:72b71dad2a3d1650e69ad42a5c4edbc59ee017f08c32c95694172bc501def23c"},
-    {file = "grpcio-1.59.3-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c0f0a11d82d0253656cc42e04b6a149521e02e755fe2e4edd21123de610fd1d4"},
-    {file = "grpcio-1.59.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60cddafb70f9a2c81ba251b53b4007e07cca7389e704f86266e22c4bffd8bf1d"},
-    {file = "grpcio-1.59.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6c75a1fa0e677c1d2b6d4196ad395a5c381dfb8385f07ed034ef667cdcdbcc25"},
-    {file = "grpcio-1.59.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:e1d8e01438d5964a11167eec1edb5f85ed8e475648f36c834ed5db4ffba24ac8"},
-    {file = "grpcio-1.59.3-cp310-cp310-win32.whl", hash = "sha256:c4b0076f0bf29ee62335b055a9599f52000b7941f577daa001c7ef961a1fbeab"},
-    {file = "grpcio-1.59.3-cp310-cp310-win_amd64.whl", hash = "sha256:b1f00a3e6e0c3dccccffb5579fc76ebfe4eb40405ba308505b41ef92f747746a"},
-    {file = "grpcio-1.59.3-cp311-cp311-linux_armv7l.whl", hash = "sha256:3996aaa21231451161dc29df6a43fcaa8b332042b6150482c119a678d007dd86"},
-    {file = "grpcio-1.59.3-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:cb4e9cbd9b7388fcb06412da9f188c7803742d06d6f626304eb838d1707ec7e3"},
-    {file = "grpcio-1.59.3-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:8022ca303d6c694a0d7acfb2b472add920217618d3a99eb4b14edc7c6a7e8fcf"},
-    {file = "grpcio-1.59.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b36683fad5664283755a7f4e2e804e243633634e93cd798a46247b8e54e3cb0d"},
-    {file = "grpcio-1.59.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8239b853226e4824e769517e1b5232e7c4dda3815b200534500338960fcc6118"},
-    {file = "grpcio-1.59.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0511af8653fbda489ff11d542a08505d56023e63cafbda60e6e00d4e0bae86ea"},
-    {file = "grpcio-1.59.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e78dc982bda74cef2ddfce1c91d29b96864c4c680c634e279ed204d51e227473"},
-    {file = "grpcio-1.59.3-cp311-cp311-win32.whl", hash = "sha256:6a5c3a96405966c023e139c3bcccb2c7c776a6f256ac6d70f8558c9041bdccc3"},
-    {file = "grpcio-1.59.3-cp311-cp311-win_amd64.whl", hash = "sha256:ed26826ee423b11477297b187371cdf4fa1eca874eb1156422ef3c9a60590dd9"},
-    {file = "grpcio-1.59.3-cp312-cp312-linux_armv7l.whl", hash = "sha256:45dddc5cb5227d30fa43652d8872dc87f086d81ab4b500be99413bad0ae198d7"},
-    {file = "grpcio-1.59.3-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:1736496d74682e53dd0907fd515f2694d8e6a96c9a359b4080b2504bf2b2d91b"},
-    {file = "grpcio-1.59.3-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:ddbd1a16138e52e66229047624de364f88a948a4d92ba20e4e25ad7d22eef025"},
-    {file = "grpcio-1.59.3-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fcfa56f8d031ffda902c258c84c4b88707f3a4be4827b4e3ab8ec7c24676320d"},
-    {file = "grpcio-1.59.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2eb8f0c7c0c62f7a547ad7a91ba627a5aa32a5ae8d930783f7ee61680d7eb8d"},
-    {file = "grpcio-1.59.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8d993399cc65e3a34f8fd48dd9ad7a376734564b822e0160dd18b3d00c1a33f9"},
-    {file = "grpcio-1.59.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:c0bd141f4f41907eb90bda74d969c3cb21c1c62779419782a5b3f5e4b5835718"},
-    {file = "grpcio-1.59.3-cp312-cp312-win32.whl", hash = "sha256:33b8fd65d4e97efa62baec6171ce51f9cf68f3a8ba9f866f4abc9d62b5c97b79"},
-    {file = "grpcio-1.59.3-cp312-cp312-win_amd64.whl", hash = "sha256:0e735ed002f50d4f3cb9ecfe8ac82403f5d842d274c92d99db64cfc998515e07"},
-    {file = "grpcio-1.59.3-cp37-cp37m-linux_armv7l.whl", hash = "sha256:ea40ce4404e7cca0724c91a7404da410f0144148fdd58402a5942971e3469b94"},
-    {file = "grpcio-1.59.3-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:83113bcc393477b6f7342b9f48e8a054330c895205517edc66789ceea0796b53"},
-    {file = "grpcio-1.59.3-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:73afbac602b8f1212a50088193601f869b5073efa9855b3e51aaaec97848fc8a"},
-    {file = "grpcio-1.59.3-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:575d61de1950b0b0699917b686b1ca108690702fcc2df127b8c9c9320f93e069"},
-    {file = "grpcio-1.59.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd76057b5c9a4d68814610ef9226925f94c1231bbe533fdf96f6181f7d2ff9e"},
-    {file = "grpcio-1.59.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:95d6fd804c81efe4879e38bfd84d2b26e339a0a9b797e7615e884ef4686eb47b"},
-    {file = "grpcio-1.59.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:0d42048b8a3286ea4134faddf1f9a59cf98192b94aaa10d910a25613c5eb5bfb"},
-    {file = "grpcio-1.59.3-cp37-cp37m-win_amd64.whl", hash = "sha256:4619fea15c64bcdd9d447cdbdde40e3d5f1da3a2e8ae84103d94a9c1df210d7e"},
-    {file = "grpcio-1.59.3-cp38-cp38-linux_armv7l.whl", hash = "sha256:95b5506e70284ac03b2005dd9ffcb6708c9ae660669376f0192a710687a22556"},
-    {file = "grpcio-1.59.3-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:9e17660947660ccfce56c7869032910c179a5328a77b73b37305cd1ee9301c2e"},
-    {file = "grpcio-1.59.3-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:00912ce19914d038851be5cd380d94a03f9d195643c28e3ad03d355cc02ce7e8"},
-    {file = "grpcio-1.59.3-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e58b3cadaa3c90f1efca26ba33e0d408b35b497307027d3d707e4bcd8de862a6"},
-    {file = "grpcio-1.59.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d787ecadea865bdf78f6679f6f5bf4b984f18f659257ba612979df97a298b3c3"},
-    {file = "grpcio-1.59.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0814942ba1bba269db4e760a34388640c601dece525c6a01f3b4ff030cc0db69"},
-    {file = "grpcio-1.59.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fb111aa99d3180c361a35b5ae1e2c63750220c584a1344229abc139d5c891881"},
-    {file = "grpcio-1.59.3-cp38-cp38-win32.whl", hash = "sha256:eb8ba504c726befe40a356ecbe63c6c3c64c9a439b3164f5a718ec53c9874da0"},
-    {file = "grpcio-1.59.3-cp38-cp38-win_amd64.whl", hash = "sha256:cdbc6b32fadab9bebc6f49d3e7ec4c70983c71e965497adab7f87de218e84391"},
-    {file = "grpcio-1.59.3-cp39-cp39-linux_armv7l.whl", hash = "sha256:c82ca1e4be24a98a253d6dbaa216542e4163f33f38163fc77964b0f0d255b552"},
-    {file = "grpcio-1.59.3-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:36636babfda14f9e9687f28d5b66d349cf88c1301154dc71c6513de2b6c88c59"},
-    {file = "grpcio-1.59.3-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:5f9b2e591da751ac7fdd316cc25afafb7a626dededa9b414f90faad7f3ccebdb"},
-    {file = "grpcio-1.59.3-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a93a82876a4926bf451db82ceb725bd87f42292bacc94586045261f501a86994"},
-    {file = "grpcio-1.59.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce31fa0bfdd1f2bb15b657c16105c8652186eab304eb512e6ae3b99b2fdd7d13"},
-    {file = "grpcio-1.59.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:16da0e40573962dab6cba16bec31f25a4f468e6d05b658e589090fe103b03e3d"},
-    {file = "grpcio-1.59.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1d1a17372fd425addd5812049fa7374008ffe689585f27f802d0935522cf4b7"},
-    {file = "grpcio-1.59.3-cp39-cp39-win32.whl", hash = "sha256:52cc38a7241b5f7b4a91aaf9000fdd38e26bb00d5e8a71665ce40cfcee716281"},
-    {file = "grpcio-1.59.3-cp39-cp39-win_amd64.whl", hash = "sha256:b491e5bbcad3020a96842040421e508780cade35baba30f402df9d321d1c423e"},
-    {file = "grpcio-1.59.3.tar.gz", hash = "sha256:7800f99568a74a06ebdccd419dd1b6e639b477dcaf6da77ea702f8fb14ce5f80"},
+    {file = "grpcio-1.62.2-cp310-cp310-linux_armv7l.whl", hash = "sha256:66344ea741124c38588a664237ac2fa16dfd226964cca23ddc96bd4accccbde5"},
+    {file = "grpcio-1.62.2-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:5dab7ac2c1e7cb6179c6bfad6b63174851102cbe0682294e6b1d6f0981ad7138"},
+    {file = "grpcio-1.62.2-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:3ad00f3f0718894749d5a8bb0fa125a7980a2f49523731a9b1fabf2b3522aa43"},
+    {file = "grpcio-1.62.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e72ddfee62430ea80133d2cbe788e0d06b12f865765cb24a40009668bd8ea05"},
+    {file = "grpcio-1.62.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:53d3a59a10af4c2558a8e563aed9f256259d2992ae0d3037817b2155f0341de1"},
+    {file = "grpcio-1.62.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a1511a303f8074f67af4119275b4f954189e8313541da7b88b1b3a71425cdb10"},
+    {file = "grpcio-1.62.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b94d41b7412ef149743fbc3178e59d95228a7064c5ab4760ae82b562bdffb199"},
+    {file = "grpcio-1.62.2-cp310-cp310-win32.whl", hash = "sha256:a75af2fc7cb1fe25785be7bed1ab18cef959a376cdae7c6870184307614caa3f"},
+    {file = "grpcio-1.62.2-cp310-cp310-win_amd64.whl", hash = "sha256:80407bc007754f108dc2061e37480238b0dc1952c855e86a4fc283501ee6bb5d"},
+    {file = "grpcio-1.62.2-cp311-cp311-linux_armv7l.whl", hash = "sha256:c1624aa686d4b36790ed1c2e2306cc3498778dffaf7b8dd47066cf819028c3ad"},
+    {file = "grpcio-1.62.2-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:1c1bb80299bdef33309dff03932264636450c8fdb142ea39f47e06a7153d3063"},
+    {file = "grpcio-1.62.2-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:db068bbc9b1fa16479a82e1ecf172a93874540cb84be69f0b9cb9b7ac3c82670"},
+    {file = "grpcio-1.62.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2cc8a308780edbe2c4913d6a49dbdb5befacdf72d489a368566be44cadaef1a"},
+    {file = "grpcio-1.62.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0695ae31a89f1a8fc8256050329a91a9995b549a88619263a594ca31b76d756"},
+    {file = "grpcio-1.62.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:88b4f9ee77191dcdd8810241e89340a12cbe050be3e0d5f2f091c15571cd3930"},
+    {file = "grpcio-1.62.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:2a0204532aa2f1afd467024b02b4069246320405bc18abec7babab03e2644e75"},
+    {file = "grpcio-1.62.2-cp311-cp311-win32.whl", hash = "sha256:6e784f60e575a0de554ef9251cbc2ceb8790914fe324f11e28450047f264ee6f"},
+    {file = "grpcio-1.62.2-cp311-cp311-win_amd64.whl", hash = "sha256:112eaa7865dd9e6d7c0556c8b04ae3c3a2dc35d62ad3373ab7f6a562d8199200"},
+    {file = "grpcio-1.62.2-cp312-cp312-linux_armv7l.whl", hash = "sha256:65034473fc09628a02fb85f26e73885cf1ed39ebd9cf270247b38689ff5942c5"},
+    {file = "grpcio-1.62.2-cp312-cp312-macosx_10_10_universal2.whl", hash = "sha256:d2c1771d0ee3cf72d69bb5e82c6a82f27fbd504c8c782575eddb7839729fbaad"},
+    {file = "grpcio-1.62.2-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:3abe6838196da518863b5d549938ce3159d809218936851b395b09cad9b5d64a"},
+    {file = "grpcio-1.62.2-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c5ffeb269f10cedb4f33142b89a061acda9f672fd1357331dbfd043422c94e9e"},
+    {file = "grpcio-1.62.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404d3b4b6b142b99ba1cff0b2177d26b623101ea2ce51c25ef6e53d9d0d87bcc"},
+    {file = "grpcio-1.62.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:262cda97efdabb20853d3b5a4c546a535347c14b64c017f628ca0cc7fa780cc6"},
+    {file = "grpcio-1.62.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17708db5b11b966373e21519c4c73e5a750555f02fde82276ea2a267077c68ad"},
+    {file = "grpcio-1.62.2-cp312-cp312-win32.whl", hash = "sha256:b7ec9e2f8ffc8436f6b642a10019fc513722858f295f7efc28de135d336ac189"},
+    {file = "grpcio-1.62.2-cp312-cp312-win_amd64.whl", hash = "sha256:aa787b83a3cd5e482e5c79be030e2b4a122ecc6c5c6c4c42a023a2b581fdf17b"},
+    {file = "grpcio-1.62.2-cp37-cp37m-linux_armv7l.whl", hash = "sha256:cfd23ad29bfa13fd4188433b0e250f84ec2c8ba66b14a9877e8bce05b524cf54"},
+    {file = "grpcio-1.62.2-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:af15e9efa4d776dfcecd1d083f3ccfb04f876d613e90ef8432432efbeeac689d"},
+    {file = "grpcio-1.62.2-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:f4aa94361bb5141a45ca9187464ae81a92a2a135ce2800b2203134f7a1a1d479"},
+    {file = "grpcio-1.62.2-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82af3613a219512a28ee5c95578eb38d44dd03bca02fd918aa05603c41018051"},
+    {file = "grpcio-1.62.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55ddaf53474e8caeb29eb03e3202f9d827ad3110475a21245f3c7712022882a9"},
+    {file = "grpcio-1.62.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c79b518c56dddeec79e5500a53d8a4db90da995dfe1738c3ac57fe46348be049"},
+    {file = "grpcio-1.62.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:a5eb4844e5e60bf2c446ef38c5b40d7752c6effdee882f716eb57ae87255d20a"},
+    {file = "grpcio-1.62.2-cp37-cp37m-win_amd64.whl", hash = "sha256:aaae70364a2d1fb238afd6cc9fcb10442b66e397fd559d3f0968d28cc3ac929c"},
+    {file = "grpcio-1.62.2-cp38-cp38-linux_armv7l.whl", hash = "sha256:1bcfe5070e4406f489e39325b76caeadab28c32bf9252d3ae960c79935a4cc36"},
+    {file = "grpcio-1.62.2-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:da6a7b6b938c15fa0f0568e482efaae9c3af31963eec2da4ff13a6d8ec2888e4"},
+    {file = "grpcio-1.62.2-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:41955b641c34db7d84db8d306937b72bc4968eef1c401bea73081a8d6c3d8033"},
+    {file = "grpcio-1.62.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c772f225483905f675cb36a025969eef9712f4698364ecd3a63093760deea1bc"},
+    {file = "grpcio-1.62.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:07ce1f775d37ca18c7a141300e5b71539690efa1f51fe17f812ca85b5e73262f"},
+    {file = "grpcio-1.62.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:26f415f40f4a93579fd648f48dca1c13dfacdfd0290f4a30f9b9aeb745026811"},
+    {file = "grpcio-1.62.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:db707e3685ff16fc1eccad68527d072ac8bdd2e390f6daa97bc394ea7de4acea"},
+    {file = "grpcio-1.62.2-cp38-cp38-win32.whl", hash = "sha256:589ea8e75de5fd6df387de53af6c9189c5231e212b9aa306b6b0d4f07520fbb9"},
+    {file = "grpcio-1.62.2-cp38-cp38-win_amd64.whl", hash = "sha256:3c3ed41f4d7a3aabf0f01ecc70d6b5d00ce1800d4af652a549de3f7cf35c4abd"},
+    {file = "grpcio-1.62.2-cp39-cp39-linux_armv7l.whl", hash = "sha256:162ccf61499c893831b8437120600290a99c0bc1ce7b51f2c8d21ec87ff6af8b"},
+    {file = "grpcio-1.62.2-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:f27246d7da7d7e3bd8612f63785a7b0c39a244cf14b8dd9dd2f2fab939f2d7f1"},
+    {file = "grpcio-1.62.2-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:2507006c8a478f19e99b6fe36a2464696b89d40d88f34e4b709abe57e1337467"},
+    {file = "grpcio-1.62.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a90ac47a8ce934e2c8d71e317d2f9e7e6aaceb2d199de940ce2c2eb611b8c0f4"},
+    {file = "grpcio-1.62.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99701979bcaaa7de8d5f60476487c5df8f27483624f1f7e300ff4669ee44d1f2"},
+    {file = "grpcio-1.62.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:af7dc3f7a44f10863b1b0ecab4078f0a00f561aae1edbd01fd03ad4dcf61c9e9"},
+    {file = "grpcio-1.62.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fa63245271920786f4cb44dcada4983a3516be8f470924528cf658731864c14b"},
+    {file = "grpcio-1.62.2-cp39-cp39-win32.whl", hash = "sha256:c6ad9c39704256ed91a1cffc1379d63f7d0278d6a0bad06b0330f5d30291e3a3"},
+    {file = "grpcio-1.62.2-cp39-cp39-win_amd64.whl", hash = "sha256:16da954692fd61aa4941fbeda405a756cd96b97b5d95ca58a92547bba2c1624f"},
+    {file = "grpcio-1.62.2.tar.gz", hash = "sha256:c77618071d96b7a8be2c10701a98537823b9c65ba256c0b9067e0594cdbd954d"},
 ]
 
 [package.extras]
-protobuf = ["grpcio-tools (>=1.59.3)"]
+protobuf = ["grpcio-tools (>=1.62.2)"]
 
 [[package]]
 name = "grpcio-reflection"
@@ -768,45 +819,85 @@ setuptools = "*"
 
 [[package]]
 name = "hf-transfer"
-version = "0.1.4"
+version = "0.1.6"
 description = ""
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "hf_transfer-0.1.4-cp310-cp310-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:6ff5fbde30a5bed35ef8f0d4ba78bde9f6d60a233dbff78a0e4035d6e6f71e4c"},
-    {file = "hf_transfer-0.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1c5c20f76e7f3451cff476b85c55dcb8566ebc94a596cb9eb39c0bb75db8675"},
-    {file = "hf_transfer-0.1.4-cp310-none-win_amd64.whl", hash = "sha256:84c3ce20c68863a7d998711b98726ba9ae8f2e3fc0d685bc2c9ac9833c0f4048"},
-    {file = "hf_transfer-0.1.4-cp311-cp311-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:dab1cf4e2e6fcb963fe0e48e6b5e3a95cf65ee376c7b6618a05dbb2ef0dde183"},
-    {file = "hf_transfer-0.1.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63c9c7aef90facf45391c86131ed00e74333637735cfec52da4f5170004d0b3f"},
-    {file = "hf_transfer-0.1.4-cp311-none-win_amd64.whl", hash = "sha256:eca1fe6ae145e88455d0a174248080498cea52ad45cee50702070b47dffa421f"},
-    {file = "hf_transfer-0.1.4-cp312-cp312-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:d07c0d26b5c01ad50d22ddcff7d30c4e8cbb823565b7f61e0ddb35f7faeae415"},
-    {file = "hf_transfer-0.1.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9b9cf169c3c64883b07f7ded5e3f14ae1d437eb77448738b88c923fc5597c47"},
-    {file = "hf_transfer-0.1.4-cp312-none-win_amd64.whl", hash = "sha256:6b8518b9ebb85b0238745be81f7b88383c7ea216dd8407d46444bcc7806dc0ef"},
-    {file = "hf_transfer-0.1.4-cp37-cp37m-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:ea32e9f91de3f2dad3567577c293f2e81a9309e680def4712ec0c4ea49be6833"},
-    {file = "hf_transfer-0.1.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e81a10dbf2ac534083da06c200456b5d10ba7a1e8c4c5c48f7ea1ca4cf6af474"},
-    {file = "hf_transfer-0.1.4-cp37-none-win_amd64.whl", hash = "sha256:97555bbff69a0459712e5d25d659c0dc74cb8f9726562ca66241f1e1b081f6a9"},
-    {file = "hf_transfer-0.1.4-cp38-cp38-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:38bce7a511952e1b804168e956cd3a3b1ff7e38828259c3cdae27614060b90c5"},
-    {file = "hf_transfer-0.1.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d1977e94e8c8fc8a0e9ce74a651d4694629e526da246a492855fcfb710aa489"},
-    {file = "hf_transfer-0.1.4-cp38-none-win_amd64.whl", hash = "sha256:6ca2d2c40e5e94c5de7e502037ad23ac1d803a2a12760b15b3e3f88c616202bd"},
-    {file = "hf_transfer-0.1.4-cp39-cp39-macosx_10_9_x86_64.macosx_11_0_arm64.macosx_10_9_universal2.whl", hash = "sha256:c04a93acb58e50b8da1e2258185e54f6bf48ba24bf95e470310178b7047c1017"},
-    {file = "hf_transfer-0.1.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3028a807363e0b2c64985c44732ba4ab187a569f013367d2115a6e09ae95031"},
-    {file = "hf_transfer-0.1.4-cp39-none-win_amd64.whl", hash = "sha256:dc9c7c1d0d79fc06baf86d41620623bb6bb2736755329ea6b1ec5faf71e3e36b"},
-    {file = "hf_transfer-0.1.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a466ae2b11d72df9e0005eb8ff7f537d5460c98b64fb6e49f3076ee14040dcf"},
-    {file = "hf_transfer-0.1.4-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb34a023276936d4716112e17daea4ff98afc35b6113dd0f0383710dc208c058"},
-    {file = "hf_transfer-0.1.4-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0647b84d7ff0eee1de6479179a5d43d0695001733f17eecc00153f0f8ab1ac"},
-    {file = "hf_transfer-0.1.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:27d0bc1f8b79a6d65751efbce7eb02d2c1bd7e4de1a46aac18995461590ce4dd"},
-    {file = "hf_transfer-0.1.4.tar.gz", hash = "sha256:687e090639cd52a48dedbfaa9e455a2c99c5169ece3d911f95983b1d4d4c84ed"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:6fd3d61f9229d27def007e53540412507b74ac2fdb1a29985ae0b6a5137749a2"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b043bb78df1225de043eb041de9d97783fcca14a0bdc1b1d560fc172fc21b648"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7db60dd18eae4fa6ea157235fb82196cde5313995b396d1b591aad3b790a7f8f"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:30d31dbab9b5a558cce407b8728e39d87d7af1ef8745ddb90187e9ae0b9e1e90"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f6b368bddd757efc7af3126ba81f9ac8f9435e2cc00902cb3d64f2be28d8f719"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa2086d8aefaaa3e144e167324574882004c0cec49bf2d0638ec4b74732d8da0"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45d8985a0940bfe1535cb4ca781f5c11e47c83798ef3373ee1f5d57bbe527a9c"},
+    {file = "hf_transfer-0.1.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f42b89735f1cde22f2a795d1f0915741023235666be7de45879e533c7d6010c"},
+    {file = "hf_transfer-0.1.6-cp310-none-win32.whl", hash = "sha256:2d2c4c4613f3ad45b6ce6291e347b2d3ba1b86816635681436567e461cb3c961"},
+    {file = "hf_transfer-0.1.6-cp310-none-win_amd64.whl", hash = "sha256:78b0eed8d8dce60168a46e584b9742b816af127d7e410a713e12c31249195342"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f1d8c172153f9a6cdaecf137612c42796076f61f6bea1072c90ac2e17c1ab6fa"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2c601996351f90c514a75a0eeb02bf700b1ad1db2d946cbfe4b60b79e29f0b2f"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e585c808405557d3f5488f385706abb696997bbae262ea04520757e30836d9d"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ec51af1e8cf4268c268bd88932ade3d7ca895a3c661b42493503f02610ae906b"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d106fdf996332f6df3ed3fab6d6332df82e8c1fb4b20fd81a491ca4d2ab5616a"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e9c2ee9e9fde5a0319cc0e8ddfea10897482bc06d5709b10a238f1bc2ebcbc0b"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f394ea32bc7802b061e549d3133efc523b4ae4fd19bf4b74b183ca6066eef94e"},
+    {file = "hf_transfer-0.1.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4282f09902114cd67fca98a1a1bad569a44521a8395fedf327e966714f68b977"},
+    {file = "hf_transfer-0.1.6-cp311-none-win32.whl", hash = "sha256:276dbf307d5ab6f1bcbf57b5918bfcf9c59d6848ccb28242349e1bb5985f983b"},
+    {file = "hf_transfer-0.1.6-cp311-none-win_amd64.whl", hash = "sha256:fa475175c51451186bea804471995fa8e7b2a48a61dcca55534911dc25955527"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:23d157a67acfa00007799323a1c441b2bbacc7dee625b016b7946fe0e25e6c89"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6067342a2864b988f861cd2d31bd78eb1e84d153a3f6df38485b6696d9ad3013"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91cfcb3070e205b58fa8dc8bcb6a62ccc40913fcdb9cd1ff7c364c8e3aa85345"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb76064ac5165d5eeaaf8d0903e8bf55477221ecc2a4a4d69f0baca065ab905b"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dabd3a177d83028f164984cf4dd859f77ec1e20c97a6f307ff8fcada0785ef1"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0bf4254e44f64a26e0a5b73b5d7e8d91bb36870718fb4f8e126ec943ff4c805"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d32c1b106f38f336ceb21531f4db9b57d777b9a33017dafdb6a5316388ebe50"},
+    {file = "hf_transfer-0.1.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff05aba3c83921e5c7635ba9f07c693cc893350c447644824043aeac27b285f5"},
+    {file = "hf_transfer-0.1.6-cp312-none-win32.whl", hash = "sha256:051ef0c55607652cb5974f59638da035773254b9a07d7ee5b574fe062de4c9d1"},
+    {file = "hf_transfer-0.1.6-cp312-none-win_amd64.whl", hash = "sha256:716fb5c574fcbdd8092ce73f9b6c66f42e3544337490f77c60ec07df02bd081b"},
+    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c0c981134a55965e279cb7be778c1ccaf93f902fc9ebe31da4f30caf824cc4d"},
+    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ef1f145f04c5b573915bcb1eb5db4039c74f6b46fce73fc473c4287e613b623"},
+    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0a7609b004db3347dbb7796df45403eceb171238210d054d93897d6d84c63a4"},
+    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:60f0864bf5996773dbd5f8ae4d1649041f773fe9d5769f4c0eeb5553100acef3"},
+    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d01e55d630ffe70a4f5d0ed576a04c6a48d7c65ca9a7d18f2fca385f20685a9"},
+    {file = "hf_transfer-0.1.6-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d855946c5062b665190de15b2bdbd4c8eddfee35350bfb7564592e23d36fbbd3"},
+    {file = "hf_transfer-0.1.6-cp37-none-win32.whl", hash = "sha256:fd40b2409cfaf3e8aba20169ee09552f69140e029adeec261b988903ff0c8f6f"},
+    {file = "hf_transfer-0.1.6-cp37-none-win_amd64.whl", hash = "sha256:0e0eba49d46d3b5481919aea0794aec625fbc6ecdf13fe7e0e9f3fc5d5ad5971"},
+    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e669fecb29fc454449739f9f53ed9253197e7c19e6a6eaa0f08334207af4287"},
+    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:89f701802892e5eb84f89f402686861f87dc227d6082b05f4e9d9b4e8015a3c3"},
+    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b6f2b0c8b95b01409275d789a9b74d5f2e146346f985d384bf50ec727caf1ccc"},
+    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa855a2fa262792a230f9efcdb5da6d431b747d1861d2a69fe7834b19aea077e"},
+    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa8ca349afb2f0713475426946261eb2035e4efb50ebd2c1d5ad04f395f4217"},
+    {file = "hf_transfer-0.1.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01255f043996bc7d1bae62d8afc5033a90c7e36ce308b988eeb84afe0a69562f"},
+    {file = "hf_transfer-0.1.6-cp38-none-win32.whl", hash = "sha256:60b1db183e8a7540cd4f8b2160ff4de55f77cb0c3fc6a10be1e7c30eb1b2bdeb"},
+    {file = "hf_transfer-0.1.6-cp38-none-win_amd64.whl", hash = "sha256:fb8be3cba6aaa50ab2e9dffbd25c8eb2046785eeff642cf0cdd0dd9ae6be3539"},
+    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d09af35e3e3f09b664e6429e9a0dc200f29c5bdfd88bdd9666de51183b1fe202"},
+    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a4505bd707cc14d85c800f961fad8ca76f804a8ad22fbb7b1a217d8d0c15e6a5"},
+    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c453fd8b0be9740faa23cecd1f28ee9ead7d900cefa64ff836960c503a744c9"},
+    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13cb8884e718a78c3b81a8cdec9c7ac196dd42961fce55c3ccff3dd783e5ad7a"},
+    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39cd39df171a2b5404de69c4e6cd14eee47f6fe91c1692f939bfb9e59a0110d8"},
+    {file = "hf_transfer-0.1.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ff0629ee9f98df57a783599602eb498f9ec3619dc69348b12e4d9d754abf0e9"},
+    {file = "hf_transfer-0.1.6-cp39-none-win32.whl", hash = "sha256:164a6ce445eb0cc7c645f5b6e1042c003d33292520c90052b6325f30c98e4c5f"},
+    {file = "hf_transfer-0.1.6-cp39-none-win_amd64.whl", hash = "sha256:11b8b4b73bf455f13218c5f827698a30ae10998ca31b8264b51052868c7a9f11"},
+    {file = "hf_transfer-0.1.6-pp310-pypy310_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:16957ba057376a99ea361074ce1094f61b58e769defa6be2422ae59c0b6a6530"},
+    {file = "hf_transfer-0.1.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7db952112e3b8ee1a5cbf500d2443e9ce4fb893281c5310a3e31469898628005"},
+    {file = "hf_transfer-0.1.6-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d39d826a7344f5e39f438d62632acd00467aa54a083b66496f61ef67a9885a56"},
+    {file = "hf_transfer-0.1.6-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4e2653fbfa92e7651db73d99b697c8684e7345c479bd6857da80bed6138abb2"},
+    {file = "hf_transfer-0.1.6-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:144277e6a86add10b90ec3b583253aec777130312256bfc8d5ade5377e253807"},
+    {file = "hf_transfer-0.1.6-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb53bcd16365313b2aa0dbdc28206f577d70770f31249cdabc387ac5841edcc"},
+    {file = "hf_transfer-0.1.6-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:990d73a5a68d8261980f146c51f4c5f9995314011cb225222021ad7c39f3af2d"},
+    {file = "hf_transfer-0.1.6-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:652406037029ab9b4097b4c5f29321bad5f64c2b46fbff142509d918aec87c29"},
+    {file = "hf_transfer-0.1.6.tar.gz", hash = "sha256:deb505a7d417d7055fd7b3549eadb91dfe782941261f3344025c486c16d1d2f9"},
 ]
 
 [[package]]
 name = "huggingface-hub"
-version = "0.20.3"
+version = "0.22.2"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "huggingface_hub-0.20.3-py3-none-any.whl", hash = "sha256:d988ae4f00d3e307b0c80c6a05ca6dbb7edba8bba3079f74cda7d9c2e562a7b6"},
-    {file = "huggingface_hub-0.20.3.tar.gz", hash = "sha256:94e7f8e074475fbc67d6a71957b678e1b4a74ff1b64a644fd6cbb83da962d05d"},
+    {file = "huggingface_hub-0.22.2-py3-none-any.whl", hash = "sha256:3429e25f38ccb834d310804a3b711e7e4953db5a9e420cc147a5e194ca90fd17"},
+    {file = "huggingface_hub-0.22.2.tar.gz", hash = "sha256:32e9a9a6843c92f253ff9ca16b9985def4d80a93fb357af5353f770ef74a81be"},
 ]
 
 [package.dependencies]
@@ -819,15 +910,17 @@ tqdm = ">=4.42.1"
 typing-extensions = ">=3.7.4.3"
 
 [package.extras]
-all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 cli = ["InquirerPy (==0.3.4)"]
-dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "mypy (==1.5.1)", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.1.3)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
+dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "minijinja (>=1.0)", "mypy (==1.5.1)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "ruff (>=0.3.0)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)", "urllib3 (<2.0)"]
 fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"]
-inference = ["aiohttp", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)"]
-quality = ["mypy (==1.5.1)", "ruff (>=0.1.3)"]
+hf-transfer = ["hf-transfer (>=0.1.4)"]
+inference = ["aiohttp", "minijinja (>=1.0)"]
+quality = ["mypy (==1.5.1)", "ruff (>=0.3.0)"]
 tensorflow = ["graphviz", "pydot", "tensorflow"]
-testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic (>1.1,<2.0)", "pydantic (>1.1,<3.0)", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
-torch = ["torch"]
+tensorflow-testing = ["keras (<3.0)", "tensorflow"]
+testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "minijinja (>=1.0)", "numpy", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-rerunfailures", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"]
+torch = ["safetensors", "torch"]
 typing = ["types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "typing-extensions (>=4.8.0)"]
 
 [[package]]
@@ -846,33 +939,33 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve
 
 [[package]]
 name = "idna"
-version = "3.4"
+version = "3.7"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.5"
 files = [
-    {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
-    {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
+    {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"},
+    {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"},
 ]
 
 [[package]]
 name = "importlib-metadata"
-version = "7.0.1"
+version = "7.1.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "importlib_metadata-7.0.1-py3-none-any.whl", hash = "sha256:4805911c3a4ec7c3966410053e9ec6a1fecd629117df5adee56dfc9432a1081e"},
-    {file = "importlib_metadata-7.0.1.tar.gz", hash = "sha256:f238736bb06590ae52ac1fab06a3a9ef1d8dce2b7a35b5ab329371d6c8f5d2cc"},
+    {file = "importlib_metadata-7.1.0-py3-none-any.whl", hash = "sha256:30962b96c0c223483ed6cc7280e7f0199feb01a0e40cfae4d4450fc6fab1f570"},
+    {file = "importlib_metadata-7.1.0.tar.gz", hash = "sha256:b78938b926ee8d5f020fc4772d487045805a55ddbad2ecf21c6d60938dc7fcd2"},
 ]
 
 [package.dependencies]
 zipp = ">=0.5"
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
 perf = ["ipython"]
-testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"]
+testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
 
 [[package]]
 name = "iniconfig"
@@ -885,15 +978,40 @@ files = [
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
 ]
 
+[[package]]
+name = "intel-openmp"
+version = "2021.4.0"
+description = "Intel OpenMP* Runtime Library"
+optional = false
+python-versions = "*"
+files = [
+    {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"},
+    {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:3b921236a38384e2016f0f3d65af6732cf2c12918087128a9163225451e776f2"},
+    {file = "intel_openmp-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:e2240ab8d01472fed04f3544a878cda5da16c26232b7ea1b59132dbfb48b186e"},
+    {file = "intel_openmp-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:6e863d8fd3d7e8ef389d52cf97a50fe2afe1a19247e8c0d168ce021546f96fc9"},
+    {file = "intel_openmp-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:eef4c8bcc8acefd7f5cd3b9384dbf73d59e2c99fc56545712ded913f43c4a94f"},
+]
+
+[[package]]
+name = "interegular"
+version = "0.3.3"
+description = "a regex intersection checker"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "interegular-0.3.3-py37-none-any.whl", hash = "sha256:b0c07007d48c89d6d19f7204972d369b2a77222722e126b6aa63aa721dc3b19c"},
+    {file = "interegular-0.3.3.tar.gz", hash = "sha256:d9b697b21b34884711399ba0f0376914b81899ce670032486d0d048344a76600"},
+]
+
 [[package]]
 name = "jinja2"
-version = "3.1.2"
+version = "3.1.3"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"},
-    {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"},
+    {file = "Jinja2-3.1.3-py3-none-any.whl", hash = "sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa"},
+    {file = "Jinja2-3.1.3.tar.gz", hash = "sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90"},
 ]
 
 [package.dependencies]
@@ -902,6 +1020,99 @@ MarkupSafe = ">=2.0"
 [package.extras]
 i18n = ["Babel (>=2.7)"]
 
+[[package]]
+name = "joblib"
+version = "1.4.0"
+description = "Lightweight pipelining with Python functions"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "joblib-1.4.0-py3-none-any.whl", hash = "sha256:42942470d4062537be4d54c83511186da1fc14ba354961a2114da91efa9a4ed7"},
+    {file = "joblib-1.4.0.tar.gz", hash = "sha256:1eb0dc091919cd384490de890cb5dfd538410a6d4b3b54eef09fb8c50b409b1c"},
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.21.1"
+description = "An implementation of JSON Schema validation for Python"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "jsonschema-4.21.1-py3-none-any.whl", hash = "sha256:7996507afae316306f9e2290407761157c6f78002dcf7419acb99822143d1c6f"},
+    {file = "jsonschema-4.21.1.tar.gz", hash = "sha256:85727c00279f5fa6bedbe6238d2aa6403bedd8b4864ab11207d07df3cc1b2ee5"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+jsonschema-specifications = ">=2023.03.6"
+referencing = ">=0.28.4"
+rpds-py = ">=0.7.1"
+
+[package.extras]
+format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
+
+[[package]]
+name = "jsonschema-specifications"
+version = "2023.12.1"
+description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "jsonschema_specifications-2023.12.1-py3-none-any.whl", hash = "sha256:87e4fdf3a94858b8a2ba2778d9ba57d8a9cafca7c7489c46ba0d30a8bc6a9c3c"},
+    {file = "jsonschema_specifications-2023.12.1.tar.gz", hash = "sha256:48a76787b3e70f5ed53f1160d2b81f586e4ca6d1548c5de7085d1682674764cc"},
+]
+
+[package.dependencies]
+referencing = ">=0.31.0"
+
+[[package]]
+name = "lark"
+version = "1.1.9"
+description = "a modern parsing library"
+optional = true
+python-versions = ">=3.6"
+files = [
+    {file = "lark-1.1.9-py3-none-any.whl", hash = "sha256:a0dd3a87289f8ccbb325901e4222e723e7d745dbfc1803eaf5f3d2ace19cf2db"},
+    {file = "lark-1.1.9.tar.gz", hash = "sha256:15fa5236490824c2c4aba0e22d2d6d823575dcaf4cdd1848e34b6ad836240fba"},
+]
+
+[package.extras]
+atomic-cache = ["atomicwrites"]
+interegular = ["interegular (>=0.3.1,<0.4.0)"]
+nearley = ["js2py"]
+regex = ["regex"]
+
+[[package]]
+name = "llvmlite"
+version = "0.42.0"
+description = "lightweight wrapper around basic LLVM functionality"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "llvmlite-0.42.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3366938e1bf63d26c34fbfb4c8e8d2ded57d11e0567d5bb243d89aab1eb56098"},
+    {file = "llvmlite-0.42.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c35da49666a21185d21b551fc3caf46a935d54d66969d32d72af109b5e7d2b6f"},
+    {file = "llvmlite-0.42.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70f44ccc3c6220bd23e0ba698a63ec2a7d3205da0d848804807f37fc243e3f77"},
+    {file = "llvmlite-0.42.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763f8d8717a9073b9e0246998de89929071d15b47f254c10eef2310b9aac033d"},
+    {file = "llvmlite-0.42.0-cp310-cp310-win_amd64.whl", hash = "sha256:8d90edf400b4ceb3a0e776b6c6e4656d05c7187c439587e06f86afceb66d2be5"},
+    {file = "llvmlite-0.42.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ae511caed28beaf1252dbaf5f40e663f533b79ceb408c874c01754cafabb9cbf"},
+    {file = "llvmlite-0.42.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:81e674c2fe85576e6c4474e8c7e7aba7901ac0196e864fe7985492b737dbab65"},
+    {file = "llvmlite-0.42.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb3975787f13eb97629052edb5017f6c170eebc1c14a0433e8089e5db43bcce6"},
+    {file = "llvmlite-0.42.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5bece0cdf77f22379f19b1959ccd7aee518afa4afbd3656c6365865f84903f9"},
+    {file = "llvmlite-0.42.0-cp311-cp311-win_amd64.whl", hash = "sha256:7e0c4c11c8c2aa9b0701f91b799cb9134a6a6de51444eff5a9087fc7c1384275"},
+    {file = "llvmlite-0.42.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:08fa9ab02b0d0179c688a4216b8939138266519aaa0aa94f1195a8542faedb56"},
+    {file = "llvmlite-0.42.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:b2fce7d355068494d1e42202c7aff25d50c462584233013eb4470c33b995e3ee"},
+    {file = "llvmlite-0.42.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebe66a86dc44634b59a3bc860c7b20d26d9aaffcd30364ebe8ba79161a9121f4"},
+    {file = "llvmlite-0.42.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d47494552559e00d81bfb836cf1c4d5a5062e54102cc5767d5aa1e77ccd2505c"},
+    {file = "llvmlite-0.42.0-cp312-cp312-win_amd64.whl", hash = "sha256:05cb7e9b6ce69165ce4d1b994fbdedca0c62492e537b0cc86141b6e2c78d5888"},
+    {file = "llvmlite-0.42.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bdd3888544538a94d7ec99e7c62a0cdd8833609c85f0c23fcb6c5c591aec60ad"},
+    {file = "llvmlite-0.42.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0936c2067a67fb8816c908d5457d63eba3e2b17e515c5fe00e5ee2bace06040"},
+    {file = "llvmlite-0.42.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a78ab89f1924fc11482209f6799a7a3fc74ddc80425a7a3e0e8174af0e9e2301"},
+    {file = "llvmlite-0.42.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d7599b65c7af7abbc978dbf345712c60fd596aa5670496561cc10e8a71cebfb2"},
+    {file = "llvmlite-0.42.0-cp39-cp39-win_amd64.whl", hash = "sha256:43d65cc4e206c2e902c1004dd5418417c4efa6c1d04df05c6c5675a27e8ca90e"},
+    {file = "llvmlite-0.42.0.tar.gz", hash = "sha256:f92b09243c0cc3f457da8b983f67bd8e1295d0f5b3746c7a1861d7a99403854a"},
+]
+
 [[package]]
 name = "loguru"
 version = "0.6.0"
@@ -922,73 +1133,91 @@ dev = ["Sphinx (>=4.1.1)", "black (>=19.10b0)", "colorama (>=0.3.4)", "docutils
 
 [[package]]
 name = "markupsafe"
-version = "2.1.3"
+version = "2.1.5"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"},
-    {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"},
-    {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f698de3fd0c4e6972b92290a45bd9b1536bffe8c6759c62471efaa8acb4c37bc"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aa57bd9cf8ae831a362185ee444e15a93ecb2e344c8e52e4d721ea3ab6ef1823"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffcc3f7c66b5f5b7931a5aa68fc9cecc51e685ef90282f4a82f0f5e9b704ad11"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47d4f1c5f80fc62fdd7777d0d40a2e9dda0a05883ab11374334f6c4de38adffd"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1f67c7038d560d92149c060157d623c542173016c4babc0c1913cca0564b9939"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9aad3c1755095ce347e26488214ef77e0485a3c34a50c5a5e2471dff60b9dd9c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:14ff806850827afd6b07a5f32bd917fb7f45b046ba40c57abdb636674a8b559c"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f9293864fe09b8149f0cc42ce56e3f0e54de883a9de90cd427f191c346eb2e1"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win32.whl", hash = "sha256:715d3562f79d540f251b99ebd6d8baa547118974341db04f5ad06d5ea3eb8007"},
-    {file = "MarkupSafe-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:1b8dd8c3fd14349433c79fa8abeb573a55fc0fdd769133baac1f5e07abf54aeb"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"},
-    {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"},
-    {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"},
-    {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"},
-    {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a17a92de5231666cfbe003f0e4b9b3a7ae3afb1ec2845aadc2bacc93ff85febc"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:72b6be590cc35924b02c78ef34b467da4ba07e4e0f0454a2c5907f473fc50ce5"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e61659ba32cf2cf1481e575d0462554625196a1f2fc06a1c777d3f48e8865d46"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2174c595a0d73a3080ca3257b40096db99799265e1c27cc5a610743acd86d62f"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae2ad8ae6ebee9d2d94b17fb62763125f3f374c25618198f40cbb8b525411900"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:075202fa5b72c86ad32dc7d0b56024ebdbcf2048c0ba09f1cde31bfdd57bcfff"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:598e3276b64aff0e7b3451b72e94fa3c238d452e7ddcd893c3ab324717456bad"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fce659a462a1be54d2ffcacea5e3ba2d74daa74f30f5f143fe0c58636e355fdd"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-win32.whl", hash = "sha256:d9fad5155d72433c921b782e58892377c44bd6252b5af2f67f16b194987338a4"},
+    {file = "MarkupSafe-2.1.5-cp310-cp310-win_amd64.whl", hash = "sha256:bf50cd79a75d181c9181df03572cdce0fbb75cc353bc350712073108cba98de5"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:629ddd2ca402ae6dbedfceeba9c46d5f7b2a61d9749597d4307f943ef198fc1f"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5b7b716f97b52c5a14bffdf688f971b2d5ef4029127f1ad7a513973cfd818df2"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ec585f69cec0aa07d945b20805be741395e28ac1627333b1c5b0105962ffced"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b91c037585eba9095565a3556f611e3cbfaa42ca1e865f7b8015fe5c7336d5a5"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7502934a33b54030eaf1194c21c692a534196063db72176b0c4028e140f8f32c"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0e397ac966fdf721b2c528cf028494e86172b4feba51d65f81ffd65c63798f3f"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:c061bb86a71b42465156a3ee7bd58c8c2ceacdbeb95d05a99893e08b8467359a"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a57fdd7ce31c7ff06cdfbf31dafa96cc533c21e443d57f5b1ecc6cdc668ec7f"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-win32.whl", hash = "sha256:397081c1a0bfb5124355710fe79478cdbeb39626492b15d399526ae53422b906"},
+    {file = "MarkupSafe-2.1.5-cp311-cp311-win_amd64.whl", hash = "sha256:2b7c57a4dfc4f16f7142221afe5ba4e093e09e728ca65c51f5620c9aaeb9a617"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:8dec4936e9c3100156f8a2dc89c4b88d5c435175ff03413b443469c7c8c5f4d1"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:3c6b973f22eb18a789b1460b4b91bf04ae3f0c4234a0a6aa6b0a92f6f7b951d4"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac07bad82163452a6884fe8fa0963fb98c2346ba78d779ec06bd7a6262132aee"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5dfb42c4604dddc8e4305050aa6deb084540643ed5804d7455b5df8fe16f5e5"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ea3d8a3d18833cf4304cd2fc9cbb1efe188ca9b5efef2bdac7adc20594a0e46b"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d050b3361367a06d752db6ead6e7edeb0009be66bc3bae0ee9d97fb326badc2a"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bec0a414d016ac1a18862a519e54b2fd0fc8bbfd6890376898a6c0891dd82e9f"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:58c98fee265677f63a4385256a6d7683ab1832f3ddd1e66fe948d5880c21a169"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-win32.whl", hash = "sha256:8590b4ae07a35970728874632fed7bd57b26b0102df2d2b233b6d9d82f6c62ad"},
+    {file = "MarkupSafe-2.1.5-cp312-cp312-win_amd64.whl", hash = "sha256:823b65d8706e32ad2df51ed89496147a42a2a6e01c13cfb6ffb8b1e92bc910bb"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c8b29db45f8fe46ad280a7294f5c3ec36dbac9491f2d1c17345be8e69cc5928f"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ec6a563cff360b50eed26f13adc43e61bc0c04d94b8be985e6fb24b81f6dcfdf"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a549b9c31bec33820e885335b451286e2969a2d9e24879f83fe904a5ce59d70a"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4f11aa001c540f62c6166c7726f71f7573b52c68c31f014c25cc7901deea0b52"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:7b2e5a267c855eea6b4283940daa6e88a285f5f2a67f2220203786dfa59b37e9"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:2d2d793e36e230fd32babe143b04cec8a8b3eb8a3122d2aceb4a371e6b09b8df"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ce409136744f6521e39fd8e2a24c53fa18ad67aa5bc7c2cf83645cce5b5c4e50"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-win32.whl", hash = "sha256:4096e9de5c6fdf43fb4f04c26fb114f61ef0bf2e5604b6ee3019d51b69e8c371"},
+    {file = "MarkupSafe-2.1.5-cp37-cp37m-win_amd64.whl", hash = "sha256:4275d846e41ecefa46e2015117a9f491e57a71ddd59bbead77e904dc02b1bed2"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:656f7526c69fac7f600bd1f400991cc282b417d17539a1b228617081106feb4a"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:97cafb1f3cbcd3fd2b6fbfb99ae11cdb14deea0736fc2b0952ee177f2b813a46"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f3fbcb7ef1f16e48246f704ab79d79da8a46891e2da03f8783a5b6fa41a9532"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa9db3f79de01457b03d4f01b34cf91bc0048eb2c3846ff26f66687c2f6d16ab"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffee1f21e5ef0d712f9033568f8344d5da8cc2869dbd08d87c84656e6a2d2f68"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5dedb4db619ba5a2787a94d877bc8ffc0566f92a01c0ef214865e54ecc9ee5e0"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:30b600cf0a7ac9234b2638fbc0fb6158ba5bdcdf46aeb631ead21248b9affbc4"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8dd717634f5a044f860435c1d8c16a270ddf0ef8588d4887037c5028b859b0c3"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-win32.whl", hash = "sha256:daa4ee5a243f0f20d528d939d06670a298dd39b1ad5f8a72a4275124a7819eff"},
+    {file = "MarkupSafe-2.1.5-cp38-cp38-win_amd64.whl", hash = "sha256:619bc166c4f2de5caa5a633b8b7326fbe98e0ccbfacabd87268a2b15ff73a029"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7a68b554d356a91cce1236aa7682dc01df0edba8d043fd1ce607c49dd3c1edcf"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:db0b55e0f3cc0be60c1f19efdde9a637c32740486004f20d1cff53c3c0ece4d2"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3e53af139f8579a6d5f7b76549125f0d94d7e630761a2111bc431fd820e163b8"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:17b950fccb810b3293638215058e432159d2b71005c74371d784862b7e4683f3"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c31f53cdae6ecfa91a77820e8b151dba54ab528ba65dfd235c80b086d68a465"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bff1b4290a66b490a2f4719358c0cdcd9bafb6b8f061e45c7a2460866bf50c2e"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bc1667f8b83f48511b94671e0e441401371dfd0f0a795c7daa4a3cd1dde55bea"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5049256f536511ee3f7e1b3f87d1d1209d327e818e6ae1365e8653d7e3abb6a6"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-win32.whl", hash = "sha256:00e046b6dd71aa03a41079792f8473dc494d564611a8f89bbbd7cb93295ebdcf"},
+    {file = "MarkupSafe-2.1.5-cp39-cp39-win_amd64.whl", hash = "sha256:fa173ec60341d6bb97a89f5ea19c85c5643c1e7dedebc22f5181eb73573142c5"},
+    {file = "MarkupSafe-2.1.5.tar.gz", hash = "sha256:d283d37a890ba4c1ae73ffadf8046435c76e7bc2247bbb63c00bd1a709c6544b"},
 ]
 
+[[package]]
+name = "mkl"
+version = "2021.4.0"
+description = "Intel® oneAPI Math Kernel Library"
+optional = false
+python-versions = "*"
+files = [
+    {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"},
+    {file = "mkl-2021.4.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:636d07d90e68ccc9630c654d47ce9fdeb036bb46e2b193b3a9ac8cfea683cce5"},
+    {file = "mkl-2021.4.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:398dbf2b0d12acaf54117a5210e8f191827f373d362d796091d161f610c1ebfb"},
+    {file = "mkl-2021.4.0-py2.py3-none-win32.whl", hash = "sha256:439c640b269a5668134e3dcbcea4350459c4a8bc46469669b2d67e07e3d330e8"},
+    {file = "mkl-2021.4.0-py2.py3-none-win_amd64.whl", hash = "sha256:ceef3cafce4c009dd25f65d7ad0d833a0fbadc3d8903991ec92351fe5de1e718"},
+]
+
+[package.dependencies]
+intel-openmp = "==2021.*"
+tbb = "==2021.*"
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@@ -1008,114 +1237,137 @@ tests = ["pytest (>=4.6)"]
 
 [[package]]
 name = "multidict"
-version = "6.0.4"
+version = "6.0.5"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"},
-    {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"},
-    {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"},
-    {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"},
-    {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"},
-    {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"},
-    {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"},
-    {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"},
-    {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"},
-    {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"},
-    {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"},
-    {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"},
-    {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"},
-    {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"},
-    {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"},
-    {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"},
-    {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"},
-    {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"},
-    {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"},
-    {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"},
-    {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"},
-    {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"},
-    {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"},
-    {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"},
-    {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"},
-    {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"},
-    {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"},
-    {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"},
-    {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"},
-    {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"},
-    {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"},
-    {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"},
-    {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"},
-    {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"},
+    {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b644ae063c10e7f324ab1ab6b548bdf6f8b47f3ec234fef1093bc2735e5f9"},
+    {file = "multidict-6.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:896ebdcf62683551312c30e20614305f53125750803b614e9e6ce74a96232604"},
+    {file = "multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:411bf8515f3be9813d06004cac41ccf7d1cd46dfe233705933dd163b60e37600"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d147090048129ce3c453f0292e7697d333db95e52616b3793922945804a433c"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:215ed703caf15f578dca76ee6f6b21b7603791ae090fbf1ef9d865571039ade5"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c6390cf87ff6234643428991b7359b5f59cc15155695deb4eda5c777d2b880f"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21fd81c4ebdb4f214161be351eb5bcf385426bf023041da2fd9e60681f3cebae"},
+    {file = "multidict-6.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3cc2ad10255f903656017363cd59436f2111443a76f996584d1077e43ee51182"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:6939c95381e003f54cd4c5516740faba40cf5ad3eeff460c3ad1d3e0ea2549bf"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:220dd781e3f7af2c2c1053da9fa96d9cf3072ca58f057f4c5adaaa1cab8fc442"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:766c8f7511df26d9f11cd3a8be623e59cca73d44643abab3f8c8c07620524e4a"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fe5d7785250541f7f5019ab9cba2c71169dc7d74d0f45253f8313f436458a4ef"},
+    {file = "multidict-6.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c1c1496e73051918fcd4f58ff2e0f2f3066d1c76a0c6aeffd9b45d53243702cc"},
+    {file = "multidict-6.0.5-cp310-cp310-win32.whl", hash = "sha256:7afcdd1fc07befad18ec4523a782cde4e93e0a2bf71239894b8d61ee578c1319"},
+    {file = "multidict-6.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:99f60d34c048c5c2fabc766108c103612344c46e35d4ed9ae0673d33c8fb26e8"},
+    {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f285e862d2f153a70586579c15c44656f888806ed0e5b56b64489afe4a2dbfba"},
+    {file = "multidict-6.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:53689bb4e102200a4fafa9de9c7c3c212ab40a7ab2c8e474491914d2305f187e"},
+    {file = "multidict-6.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:612d1156111ae11d14afaf3a0669ebf6c170dbb735e510a7438ffe2369a847fd"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7be7047bd08accdb7487737631d25735c9a04327911de89ff1b26b81745bd4e3"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de170c7b4fe6859beb8926e84f7d7d6c693dfe8e27372ce3b76f01c46e489fcf"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04bde7a7b3de05732a4eb39c94574db1ec99abb56162d6c520ad26f83267de29"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85f67aed7bb647f93e7520633d8f51d3cbc6ab96957c71272b286b2f30dc70ed"},
+    {file = "multidict-6.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:425bf820055005bfc8aa9a0b99ccb52cc2f4070153e34b701acc98d201693733"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d3eb1ceec286eba8220c26f3b0096cf189aea7057b6e7b7a2e60ed36b373b77f"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:7901c05ead4b3fb75113fb1dd33eb1253c6d3ee37ce93305acd9d38e0b5f21a4"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e0e79d91e71b9867c73323a3444724d496c037e578a0e1755ae159ba14f4f3d1"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:29bfeb0dff5cb5fdab2023a7a9947b3b4af63e9c47cae2a10ad58394b517fddc"},
+    {file = "multidict-6.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e030047e85cbcedbfc073f71836d62dd5dadfbe7531cae27789ff66bc551bd5e"},
+    {file = "multidict-6.0.5-cp311-cp311-win32.whl", hash = "sha256:2f4848aa3baa109e6ab81fe2006c77ed4d3cd1e0ac2c1fbddb7b1277c168788c"},
+    {file = "multidict-6.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:2faa5ae9376faba05f630d7e5e6be05be22913782b927b19d12b8145968a85ea"},
+    {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:51d035609b86722963404f711db441cf7134f1889107fb171a970c9701f92e1e"},
+    {file = "multidict-6.0.5-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:cbebcd5bcaf1eaf302617c114aa67569dd3f090dd0ce8ba9e35e9985b41ac35b"},
+    {file = "multidict-6.0.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2ffc42c922dbfddb4a4c3b438eb056828719f07608af27d163191cb3e3aa6cc5"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ceb3b7e6a0135e092de86110c5a74e46bda4bd4fbfeeb3a3bcec79c0f861e450"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:79660376075cfd4b2c80f295528aa6beb2058fd289f4c9252f986751a4cd0496"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4428b29611e989719874670fd152b6625500ad6c686d464e99f5aaeeaca175a"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d84a5c3a5f7ce6db1f999fb9438f686bc2e09d38143f2d93d8406ed2dd6b9226"},
+    {file = "multidict-6.0.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:76c0de87358b192de7ea9649beb392f107dcad9ad27276324c24c91774ca5271"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79a6d2ba910adb2cbafc95dad936f8b9386e77c84c35bc0add315b856d7c3abb"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:92d16a3e275e38293623ebf639c471d3e03bb20b8ebb845237e0d3664914caef"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:fb616be3538599e797a2017cccca78e354c767165e8858ab5116813146041a24"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:14c2976aa9038c2629efa2c148022ed5eb4cb939e15ec7aace7ca932f48f9ba6"},
+    {file = "multidict-6.0.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:435a0984199d81ca178b9ae2c26ec3d49692d20ee29bc4c11a2a8d4514c67eda"},
+    {file = "multidict-6.0.5-cp312-cp312-win32.whl", hash = "sha256:9fe7b0653ba3d9d65cbe7698cca585bf0f8c83dbbcc710db9c90f478e175f2d5"},
+    {file = "multidict-6.0.5-cp312-cp312-win_amd64.whl", hash = "sha256:01265f5e40f5a17f8241d52656ed27192be03bfa8764d88e8220141d1e4b3556"},
+    {file = "multidict-6.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:19fe01cea168585ba0f678cad6f58133db2aa14eccaf22f88e4a6dccadfad8b3"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf7a982604375a8d49b6cc1b781c1747f243d91b81035a9b43a2126c04766f5"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:107c0cdefe028703fb5dafe640a409cb146d44a6ae201e55b35a4af8e95457dd"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:403c0911cd5d5791605808b942c88a8155c2592e05332d2bf78f18697a5fa15e"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aeaf541ddbad8311a87dd695ed9642401131ea39ad7bc8cf3ef3967fd093b626"},
+    {file = "multidict-6.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e4972624066095e52b569e02b5ca97dbd7a7ddd4294bf4e7247d52635630dd83"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d946b0a9eb8aaa590df1fe082cee553ceab173e6cb5b03239716338629c50c7a"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b55358304d7a73d7bdf5de62494aaf70bd33015831ffd98bc498b433dfe5b10c"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:a3145cb08d8625b2d3fee1b2d596a8766352979c9bffe5d7833e0503d0f0b5e5"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d65f25da8e248202bd47445cec78e0025c0fe7582b23ec69c3b27a640dd7a8e3"},
+    {file = "multidict-6.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:c9bf56195c6bbd293340ea82eafd0071cb3d450c703d2c93afb89f93b8386ccc"},
+    {file = "multidict-6.0.5-cp37-cp37m-win32.whl", hash = "sha256:69db76c09796b313331bb7048229e3bee7928eb62bab5e071e9f7fcc4879caee"},
+    {file = "multidict-6.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:fce28b3c8a81b6b36dfac9feb1de115bab619b3c13905b419ec71d03a3fc1423"},
+    {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:76f067f5121dcecf0d63a67f29080b26c43c71a98b10c701b0677e4a065fbd54"},
+    {file = "multidict-6.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b82cc8ace10ab5bd93235dfaab2021c70637005e1ac787031f4d1da63d493c1d"},
+    {file = "multidict-6.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5cb241881eefd96b46f89b1a056187ea8e9ba14ab88ba632e68d7a2ecb7aadf7"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8e94e6912639a02ce173341ff62cc1201232ab86b8a8fcc05572741a5dc7d93"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:09a892e4a9fb47331da06948690ae38eaa2426de97b4ccbfafbdcbe5c8f37ff8"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:55205d03e8a598cfc688c71ca8ea5f66447164efff8869517f175ea632c7cb7b"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:37b15024f864916b4951adb95d3a80c9431299080341ab9544ed148091b53f50"},
+    {file = "multidict-6.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2a1dee728b52b33eebff5072817176c172050d44d67befd681609b4746e1c2e"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:edd08e6f2f1a390bf137080507e44ccc086353c8e98c657e666c017718561b89"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:60d698e8179a42ec85172d12f50b1668254628425a6bd611aba022257cac1386"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3d25f19500588cbc47dc19081d78131c32637c25804df8414463ec908631e453"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4cc0ef8b962ac7a5e62b9e826bd0cd5040e7d401bc45a6835910ed699037a461"},
+    {file = "multidict-6.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:eca2e9d0cc5a889850e9bbd68e98314ada174ff6ccd1129500103df7a94a7a44"},
+    {file = "multidict-6.0.5-cp38-cp38-win32.whl", hash = "sha256:4a6a4f196f08c58c59e0b8ef8ec441d12aee4125a7d4f4fef000ccb22f8d7241"},
+    {file = "multidict-6.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:0275e35209c27a3f7951e1ce7aaf93ce0d163b28948444bec61dd7badc6d3f8c"},
+    {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e7be68734bd8c9a513f2b0cfd508802d6609da068f40dc57d4e3494cefc92929"},
+    {file = "multidict-6.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1d9ea7a7e779d7a3561aade7d596649fbecfa5c08a7674b11b423783217933f9"},
+    {file = "multidict-6.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1456df2a27c73ce51120fa2f519f1bea2f4a03a917f4a43c8707cf4cbbae1a"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf590b134eb70629e350691ecca88eac3e3b8b3c86992042fb82e3cb1830d5e1"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5c0631926c4f58e9a5ccce555ad7747d9a9f8b10619621f22f9635f069f6233e"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:dce1c6912ab9ff5f179eaf6efe7365c1f425ed690b03341911bf4939ef2f3046"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0868d64af83169e4d4152ec612637a543f7a336e4a307b119e98042e852ad9c"},
+    {file = "multidict-6.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:141b43360bfd3bdd75f15ed811850763555a251e38b2405967f8e25fb43f7d40"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7df704ca8cf4a073334e0427ae2345323613e4df18cc224f647f251e5e75a527"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6214c5a5571802c33f80e6c84713b2c79e024995b9c5897f794b43e714daeec9"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:cd6c8fca38178e12c00418de737aef1261576bd1b6e8c6134d3e729a4e858b38"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:e02021f87a5b6932fa6ce916ca004c4d441509d33bbdbeca70d05dff5e9d2479"},
+    {file = "multidict-6.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ebd8d160f91a764652d3e51ce0d2956b38efe37c9231cd82cfc0bed2e40b581c"},
+    {file = "multidict-6.0.5-cp39-cp39-win32.whl", hash = "sha256:04da1bb8c8dbadf2a18a452639771951c662c5ad03aefe4884775454be322c9b"},
+    {file = "multidict-6.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:d6f6d4f185481c9669b9447bf9d9cf3b95a0e9df9d169bbc17e363b7d5487755"},
+    {file = "multidict-6.0.5-py3-none-any.whl", hash = "sha256:0d63c74e3d7ab26de115c49bffc92cc77ed23395303d496eae515d4204a625e7"},
+    {file = "multidict-6.0.5.tar.gz", hash = "sha256:f7e301075edaf50500f0b341543c41194d8df3ae5caf4702f2095f3ca73dd8da"},
 ]
 
 [[package]]
 name = "multiprocess"
-version = "0.70.15"
+version = "0.70.16"
 description = "better multiprocessing and multithreading in Python"
 optional = false
-python-versions = ">=3.7"
+python-versions = ">=3.8"
 files = [
-    {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"},
-    {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"},
-    {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"},
-    {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"},
-    {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"},
-    {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"},
-    {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"},
-    {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"},
-    {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"},
-    {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"},
-    {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"},
-    {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"},
-    {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"},
-    {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"},
-    {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"},
-    {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"},
+    {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
+    {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
+    {file = "multiprocess-0.70.16-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a"},
+    {file = "multiprocess-0.70.16-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054"},
+    {file = "multiprocess-0.70.16-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41"},
+    {file = "multiprocess-0.70.16-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a"},
+    {file = "multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02"},
+    {file = "multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a"},
+    {file = "multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e"},
+    {file = "multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435"},
+    {file = "multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3"},
+    {file = "multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1"},
 ]
 
 [package.dependencies]
-dill = ">=0.3.7"
+dill = ">=0.3.8"
+
+[[package]]
+name = "nest-asyncio"
+version = "1.6.0"
+description = "Patch asyncio to allow nested event loops"
+optional = true
+python-versions = ">=3.5"
+files = [
+    {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"},
+    {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"},
+]
 
 [[package]]
 name = "networkx"
@@ -1135,49 +1387,83 @@ doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9.
 extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"]
 test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"]
 
+[[package]]
+name = "numba"
+version = "0.59.1"
+description = "compiling Python code using LLVM"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "numba-0.59.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:97385a7f12212c4f4bc28f648720a92514bee79d7063e40ef66c2d30600fd18e"},
+    {file = "numba-0.59.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b77aecf52040de2a1eb1d7e314497b9e56fba17466c80b457b971a25bb1576d"},
+    {file = "numba-0.59.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3476a4f641bfd58f35ead42f4dcaf5f132569c4647c6f1360ccf18ee4cda3990"},
+    {file = "numba-0.59.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:525ef3f820931bdae95ee5379c670d5c97289c6520726bc6937a4a7d4230ba24"},
+    {file = "numba-0.59.1-cp310-cp310-win_amd64.whl", hash = "sha256:990e395e44d192a12105eca3083b61307db7da10e093972ca285c85bef0963d6"},
+    {file = "numba-0.59.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:43727e7ad20b3ec23ee4fc642f5b61845c71f75dd2825b3c234390c6d8d64051"},
+    {file = "numba-0.59.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:411df625372c77959570050e861981e9d196cc1da9aa62c3d6a836b5cc338966"},
+    {file = "numba-0.59.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2801003caa263d1e8497fb84829a7ecfb61738a95f62bc05693fcf1733e978e4"},
+    {file = "numba-0.59.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dd2842fac03be4e5324ebbbd4d2d0c8c0fc6e0df75c09477dd45b288a0777389"},
+    {file = "numba-0.59.1-cp311-cp311-win_amd64.whl", hash = "sha256:0594b3dfb369fada1f8bb2e3045cd6c61a564c62e50cf1f86b4666bc721b3450"},
+    {file = "numba-0.59.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1cce206a3b92836cdf26ef39d3a3242fec25e07f020cc4feec4c4a865e340569"},
+    {file = "numba-0.59.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8c8b4477763cb1fbd86a3be7050500229417bf60867c93e131fd2626edb02238"},
+    {file = "numba-0.59.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d80bce4ef7e65bf895c29e3889ca75a29ee01da80266a01d34815918e365835"},
+    {file = "numba-0.59.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f7ad1d217773e89a9845886401eaaab0a156a90aa2f179fdc125261fd1105096"},
+    {file = "numba-0.59.1-cp312-cp312-win_amd64.whl", hash = "sha256:5bf68f4d69dd3a9f26a9b23548fa23e3bcb9042e2935257b471d2a8d3c424b7f"},
+    {file = "numba-0.59.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4e0318ae729de6e5dbe64c75ead1a95eb01fabfe0e2ebed81ebf0344d32db0ae"},
+    {file = "numba-0.59.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0f68589740a8c38bb7dc1b938b55d1145244c8353078eea23895d4f82c8b9ec1"},
+    {file = "numba-0.59.1-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:649913a3758891c77c32e2d2a3bcbedf4a69f5fea276d11f9119677c45a422e8"},
+    {file = "numba-0.59.1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9712808e4545270291d76b9a264839ac878c5eb7d8b6e02c970dc0ac29bc8187"},
+    {file = "numba-0.59.1-cp39-cp39-win_amd64.whl", hash = "sha256:8d51ccd7008a83105ad6a0082b6a2b70f1142dc7cfd76deb8c5a862367eb8c86"},
+    {file = "numba-0.59.1.tar.gz", hash = "sha256:76f69132b96028d2774ed20415e8c528a34e3299a40581bae178f0994a2f370b"},
+]
+
+[package.dependencies]
+llvmlite = "==0.42.*"
+numpy = ">=1.22,<1.27"
+
 [[package]]
 name = "numpy"
-version = "1.26.2"
+version = "1.26.4"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "numpy-1.26.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3703fc9258a4a122d17043e57b35e5ef1c5a5837c3db8be396c82e04c1cf9b0f"},
-    {file = "numpy-1.26.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cc392fdcbd21d4be6ae1bb4475a03ce3b025cd49a9be5345d76d7585aea69440"},
-    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36340109af8da8805d8851ef1d74761b3b88e81a9bd80b290bbfed61bd2b4f75"},
-    {file = "numpy-1.26.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bcc008217145b3d77abd3e4d5ef586e3bdfba8fe17940769f8aa09b99e856c00"},
-    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3ced40d4e9e18242f70dd02d739e44698df3dcb010d31f495ff00a31ef6014fe"},
-    {file = "numpy-1.26.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b272d4cecc32c9e19911891446b72e986157e6a1809b7b56518b4f3755267523"},
-    {file = "numpy-1.26.2-cp310-cp310-win32.whl", hash = "sha256:22f8fc02fdbc829e7a8c578dd8d2e15a9074b630d4da29cda483337e300e3ee9"},
-    {file = "numpy-1.26.2-cp310-cp310-win_amd64.whl", hash = "sha256:26c9d33f8e8b846d5a65dd068c14e04018d05533b348d9eaeef6c1bd787f9919"},
-    {file = "numpy-1.26.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b96e7b9c624ef3ae2ae0e04fa9b460f6b9f17ad8b4bec6d7756510f1f6c0c841"},
-    {file = "numpy-1.26.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:aa18428111fb9a591d7a9cc1b48150097ba6a7e8299fb56bdf574df650e7d1f1"},
-    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06fa1ed84aa60ea6ef9f91ba57b5ed963c3729534e6e54055fc151fad0423f0a"},
-    {file = "numpy-1.26.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:96ca5482c3dbdd051bcd1fce8034603d6ebfc125a7bd59f55b40d8f5d246832b"},
-    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:854ab91a2906ef29dc3925a064fcd365c7b4da743f84b123002f6139bcb3f8a7"},
-    {file = "numpy-1.26.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:f43740ab089277d403aa07567be138fc2a89d4d9892d113b76153e0e412409f8"},
-    {file = "numpy-1.26.2-cp311-cp311-win32.whl", hash = "sha256:a2bbc29fcb1771cd7b7425f98b05307776a6baf43035d3b80c4b0f29e9545186"},
-    {file = "numpy-1.26.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b3fca8a5b00184828d12b073af4d0fc5fdd94b1632c2477526f6bd7842d700d"},
-    {file = "numpy-1.26.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a4cd6ed4a339c21f1d1b0fdf13426cb3b284555c27ac2f156dfdaaa7e16bfab0"},
-    {file = "numpy-1.26.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5d5244aabd6ed7f312268b9247be47343a654ebea52a60f002dc70c769048e75"},
-    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a3cdb4d9c70e6b8c0814239ead47da00934666f668426fc6e94cce869e13fd7"},
-    {file = "numpy-1.26.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa317b2325f7aa0a9471663e6093c210cb2ae9c0ad824732b307d2c51983d5b6"},
-    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:174a8880739c16c925799c018f3f55b8130c1f7c8e75ab0a6fa9d41cab092fd6"},
-    {file = "numpy-1.26.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f79b231bf5c16b1f39c7f4875e1ded36abee1591e98742b05d8a0fb55d8a3eec"},
-    {file = "numpy-1.26.2-cp312-cp312-win32.whl", hash = "sha256:4a06263321dfd3598cacb252f51e521a8cb4b6df471bb12a7ee5cbab20ea9167"},
-    {file = "numpy-1.26.2-cp312-cp312-win_amd64.whl", hash = "sha256:b04f5dc6b3efdaab541f7857351aac359e6ae3c126e2edb376929bd3b7f92d7e"},
-    {file = "numpy-1.26.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4eb8df4bf8d3d90d091e0146f6c28492b0be84da3e409ebef54349f71ed271ef"},
-    {file = "numpy-1.26.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1a13860fdcd95de7cf58bd6f8bc5a5ef81c0b0625eb2c9a783948847abbef2c2"},
-    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64308ebc366a8ed63fd0bf426b6a9468060962f1a4339ab1074c228fa6ade8e3"},
-    {file = "numpy-1.26.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baf8aab04a2c0e859da118f0b38617e5ee65d75b83795055fb66c0d5e9e9b818"},
-    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d73a3abcac238250091b11caef9ad12413dab01669511779bc9b29261dd50210"},
-    {file = "numpy-1.26.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b361d369fc7e5e1714cf827b731ca32bff8d411212fccd29ad98ad622449cc36"},
-    {file = "numpy-1.26.2-cp39-cp39-win32.whl", hash = "sha256:bd3f0091e845164a20bd5a326860c840fe2af79fa12e0469a12768a3ec578d80"},
-    {file = "numpy-1.26.2-cp39-cp39-win_amd64.whl", hash = "sha256:2beef57fb031dcc0dc8fa4fe297a742027b954949cabb52a2a376c144e5e6060"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:1cc3d5029a30fb5f06704ad6b23b35e11309491c999838c31f124fee32107c79"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94cc3c222bb9fb5a12e334d0479b97bb2df446fbe622b470928f5284ffca3f8d"},
-    {file = "numpy-1.26.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe6b44fb8fcdf7eda4ef4461b97b3f63c466b27ab151bec2366db8b197387841"},
-    {file = "numpy-1.26.2.tar.gz", hash = "sha256:f65738447676ab5777f11e6bbbdb8ce11b785e105f690bc45966574816b6d3ea"},
+    {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
+    {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d209d8969599b27ad20994c8e41936ee0964e6da07478d6c35016bc386b66ad4"},
+    {file = "numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffa75af20b44f8dba823498024771d5ac50620e6915abac414251bd971b4529f"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:62b8e4b1e28009ef2846b4c7852046736bab361f7aeadeb6a5b89ebec3c7055a"},
+    {file = "numpy-1.26.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a4abb4f9001ad2858e7ac189089c42178fcce737e4169dc61321660f1a96c7d2"},
+    {file = "numpy-1.26.4-cp310-cp310-win32.whl", hash = "sha256:bfe25acf8b437eb2a8b2d49d443800a5f18508cd811fea3181723922a8a82b07"},
+    {file = "numpy-1.26.4-cp310-cp310-win_amd64.whl", hash = "sha256:b97fe8060236edf3662adfc2c633f56a08ae30560c56310562cb4f95500022d5"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c66707fabe114439db9068ee468c26bbdf909cac0fb58686a42a24de1760c71"},
+    {file = "numpy-1.26.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:edd8b5fe47dab091176d21bb6de568acdd906d1887a4584a15a9a96a1dca06ef"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7ab55401287bfec946ced39700c053796e7cc0e3acbef09993a9ad2adba6ca6e"},
+    {file = "numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:666dbfb6ec68962c033a450943ded891bed2d54e6755e35e5835d63f4f6931d5"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:96ff0b2ad353d8f990b63294c8986f1ec3cb19d749234014f4e7eb0112ceba5a"},
+    {file = "numpy-1.26.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:60dedbb91afcbfdc9bc0b1f3f402804070deed7392c23eb7a7f07fa857868e8a"},
+    {file = "numpy-1.26.4-cp311-cp311-win32.whl", hash = "sha256:1af303d6b2210eb850fcf03064d364652b7120803a0b872f5211f5234b399f20"},
+    {file = "numpy-1.26.4-cp311-cp311-win_amd64.whl", hash = "sha256:cd25bcecc4974d09257ffcd1f098ee778f7834c3ad767fe5db785be9a4aa9cb2"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b3ce300f3644fb06443ee2222c2201dd3a89ea6040541412b8fa189341847218"},
+    {file = "numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:03a8c78d01d9781b28a6989f6fa1bb2c4f2d51201cf99d3dd875df6fbd96b23b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9fad7dcb1aac3c7f0584a5a8133e3a43eeb2fe127f47e3632d43d677c66c102b"},
+    {file = "numpy-1.26.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:675d61ffbfa78604709862923189bad94014bef562cc35cf61d3a07bba02a7ed"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ab47dbe5cc8210f55aa58e4805fe224dac469cde56b9f731a4c098b91917159a"},
+    {file = "numpy-1.26.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1dda2e7b4ec9dd512f84935c5f126c8bd8b9f2fc001e9f54af255e8c5f16b0e0"},
+    {file = "numpy-1.26.4-cp312-cp312-win32.whl", hash = "sha256:50193e430acfc1346175fcbdaa28ffec49947a06918b7b92130744e81e640110"},
+    {file = "numpy-1.26.4-cp312-cp312-win_amd64.whl", hash = "sha256:08beddf13648eb95f8d867350f6a018a4be2e5ad54c8d8caed89ebca558b2818"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7349ab0fa0c429c82442a27a9673fc802ffdb7c7775fad780226cb234965e53c"},
+    {file = "numpy-1.26.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:52b8b60467cd7dd1e9ed082188b4e6bb35aa5cdd01777621a1658910745b90be"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d5241e0a80d808d70546c697135da2c613f30e28251ff8307eb72ba696945764"},
+    {file = "numpy-1.26.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f870204a840a60da0b12273ef34f7051e98c3b5961b61b0c2c1be6dfd64fbcd3"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:679b0076f67ecc0138fd2ede3a8fd196dddc2ad3254069bcb9faf9a79b1cebcd"},
+    {file = "numpy-1.26.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47711010ad8555514b434df65f7d7b076bb8261df1ca9bb78f53d3b2db02e95c"},
+    {file = "numpy-1.26.4-cp39-cp39-win32.whl", hash = "sha256:a354325ee03388678242a4d7ebcd08b5c727033fcff3b2f536aea978e15ee9e6"},
+    {file = "numpy-1.26.4-cp39-cp39-win_amd64.whl", hash = "sha256:3373d5d70a5fe74a2c1bb6d2cfd9609ecf686d47a2d7b1d37a8f3b6bf6003aea"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:afedb719a9dcfc7eaf2287b839d8198e06dcd4cb5d276a3df279231138e83d30"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95a7476c59002f2f6c590b9b7b998306fba6a5aa646b1e22ddfeaf8f78c3a29c"},
+    {file = "numpy-1.26.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7e50d0a0cc3189f9cb0aeb3a6a6af18c16f59f004b866cd2be1c14b36134a4a0"},
+    {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
 ]
 
 [[package]]
@@ -1291,23 +1577,24 @@ nvidia-nvjitlink-cu12 = "*"
 
 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.18.1"
+version = "2.20.5"
 description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nccl_cu12-2.18.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:1a6c4acefcbebfa6de320f412bf7866de856e786e0462326ba1bac40de0b5e71"},
+    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
+    {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
 ]
 
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.3.101"
+version = "12.4.127"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 files = [
-    {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-manylinux1_x86_64.whl", hash = "sha256:64335a8088e2b9d196ae8665430bc6a2b7e6ef2eb877a9c735c804bd4ff6467c"},
-    {file = "nvidia_nvjitlink_cu12-12.3.101-py3-none-win_amd64.whl", hash = "sha256:1b2e317e437433753530792f13eece58f0aec21a2b05903be7bffe58a606cbd1"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
+    {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-win_amd64.whl", hash = "sha256:fd9020c501d27d135f983c6d3e244b197a7ccad769e34df53a42e276b0e25fa1"},
 ]
 
 [[package]]
@@ -1477,13 +1764,13 @@ files = [
 
 [[package]]
 name = "optimum"
-version = "1.17.1"
+version = "1.19.1"
 description = "Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to integrate third-party libraries from Hardware Partners and interface with their specific functionality."
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "optimum-1.17.1-py3-none-any.whl", hash = "sha256:508bc55db3c9434f4e8d5a30c39a46ac63c4cdb45bcc5a641b6c1c77cae88d23"},
-    {file = "optimum-1.17.1.tar.gz", hash = "sha256:e59af717e8691b11903fe2cfb8c6efd6f6798b0417f3e70d231e578a02448ceb"},
+    {file = "optimum-1.19.1-py3-none-any.whl", hash = "sha256:ca474589682fe10f7827c85260d116603a5823d5c251c453620584cbc06fa5f6"},
+    {file = "optimum-1.19.1.tar.gz", hash = "sha256:fd723b723bb7fe57d98b4afbd5f9e1d923d31a9843eff32331725162994ec849"},
 ]
 
 [package.dependencies]
@@ -1494,40 +1781,40 @@ numpy = "*"
 packaging = "*"
 sympy = "*"
 torch = ">=1.11"
-transformers = {version = ">=4.26.0", extras = ["sentencepiece"]}
+transformers = {version = ">=4.26.0,<4.41.0", extras = ["sentencepiece"]}
 
 [package.extras]
 amd = ["optimum-amd"]
 benchmark = ["evaluate (>=0.2.0)", "optuna", "scikit-learn", "seqeval", "torchvision", "tqdm"]
-dev = ["Pillow", "accelerate", "black (>=23.1,<24.0)", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest", "pytest-xdist", "requests", "rjieba", "ruff (==0.1.5)", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"]
+dev = ["Pillow", "accelerate", "black (>=23.1,<24.0)", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "ruff (==0.1.5)", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"]
 diffusers = ["diffusers"]
 doc-build = ["accelerate"]
 exporters = ["onnx", "onnxruntime", "timm"]
 exporters-gpu = ["onnx", "onnxruntime-gpu", "timm"]
-exporters-tf = ["h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm"]
+exporters-tf = ["h5py", "numpy (<1.24.0)", "onnx", "onnxruntime", "tensorflow (>=2.4,<=2.12.1)", "tf2onnx", "timm", "transformers[sentencepiece] (>=4.26.0,<4.38.0)"]
 furiosa = ["optimum-furiosa"]
 graphcore = ["optimum-graphcore"]
 habana = ["optimum-habana", "transformers (>=4.37.0,<4.38.0)"]
 intel = ["optimum-intel (>=1.15.0)"]
 neural-compressor = ["optimum-intel[neural-compressor] (>=1.15.0)"]
-neuron = ["optimum-neuron[neuron]"]
-neuronx = ["optimum-neuron[neuronx]"]
+neuron = ["optimum-neuron[neuron] (>=0.0.20)", "transformers (==4.36.2)"]
+neuronx = ["optimum-neuron[neuronx] (>=0.0.20)", "transformers (==4.36.2)"]
 nncf = ["optimum-intel[nncf] (>=1.15.0)"]
 onnxruntime = ["datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime (>=1.11.0)", "protobuf (>=3.20.1)"]
 onnxruntime-gpu = ["accelerate", "datasets (>=1.2.1)", "evaluate", "onnx", "onnxruntime-gpu (>=1.11.0)", "protobuf (>=3.20.1)"]
 openvino = ["optimum-intel[openvino] (>=1.15.0)"]
 quality = ["black (>=23.1,<24.0)", "ruff (==0.1.5)"]
-tests = ["Pillow", "accelerate", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest", "pytest-xdist", "requests", "rjieba", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"]
+tests = ["Pillow", "accelerate", "diffusers (>=0.17.0)", "einops", "invisible-watermark", "parameterized", "pytest (<=8.0.0)", "pytest-xdist", "requests", "rjieba", "sacremoses", "scikit-learn", "timm", "torchaudio", "torchvision"]
 
 [[package]]
 name = "optimum-habana"
-version = "1.10.4"
+version = "1.11.0"
 description = "Optimum Habana is the interface between the Hugging Face Transformers and Diffusers libraries and Habana's Gaudi processor (HPU). It provides a set of tools enabling easy model loading, training and inference on single- and multi-HPU settings for different downstream tasks."
 optional = false
 python-versions = "*"
 files = [
-    {file = "optimum-habana-1.10.4.tar.gz", hash = "sha256:f1165bbbb834806d66c9beefb307bdc0497035ba5ed7575c5004bbbf1b2216e3"},
-    {file = "optimum_habana-1.10.4-py3-none-any.whl", hash = "sha256:ef1fe4b2df975b052a75a280925dfb1fba29cdaf37aaca576c6056605866f5e3"},
+    {file = "optimum-habana-1.11.0.tar.gz", hash = "sha256:d7c7caea5daf17c0d520c52797d8a663432de6aa9ee8b35f6304d1646f225826"},
+    {file = "optimum_habana-1.11.0-py3-none-any.whl", hash = "sha256:7e94b40d339ac42f754d6e0c9f7df513ebd1afd63307ec2f05e677adfbe347c4"},
 ]
 
 [package.dependencies]
@@ -1535,194 +1822,253 @@ accelerate = "<0.28.0"
 diffusers = ">=0.26.0,<0.27.0"
 optimum = "*"
 torch = "*"
-transformers = ">=4.37.0,<4.38.0"
+transformers = ">=4.38.0,<4.39.0"
 
 [package.extras]
 quality = ["hf-doc-builder", "ruff"]
-tests = ["GitPython", "datasets", "optuna", "parameterized", "psutil", "pytest", "safetensors", "sentencepiece"]
+tests = ["GitPython", "datasets", "optuna", "parameterized", "psutil", "pytest (<8.0.0)", "safetensors", "sentencepiece"]
+
+[[package]]
+name = "outlines"
+version = "0.0.36"
+description = "Probabilistic Generative Model Programming"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "outlines-0.0.36-py3-none-any.whl", hash = "sha256:afa02ca5c449c47731fa06af66d13c2f5ee8b30f8b82b4db90e08215d6f111d1"},
+    {file = "outlines-0.0.36.tar.gz", hash = "sha256:3cffb43143548cd78c6061990feb461cffd5479999391b8390471ea839c2d46e"},
+]
+
+[package.dependencies]
+cloudpickle = "*"
+diskcache = "*"
+interegular = "*"
+jinja2 = "*"
+joblib = "*"
+jsonschema = "*"
+lark = "*"
+nest-asyncio = "*"
+numba = "*"
+numpy = "*"
+pydantic = ">=2.0"
+referencing = "*"
+requests = "*"
+scipy = "*"
+torch = ">=2.1.0"
+transformers = "*"
+
+[package.extras]
+serve = ["fastapi", "pydantic (>=2.0)", "ray (==2.9.0)", "uvicorn", "vllm (>=0.3.0)"]
+test = ["accelerate", "beartype (<0.16.0)", "coverage[toml] (>=5.1)", "datasets", "diff-cover", "huggingface-hub", "llama-cpp-python", "openai (>=1.0.0)", "pre-commit", "pytest", "pytest-benchmark", "pytest-cov", "pytest-mock", "responses", "transformers"]
 
 [[package]]
 name = "packaging"
-version = "23.2"
+version = "24.0"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "packaging-23.2-py3-none-any.whl", hash = "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7"},
-    {file = "packaging-23.2.tar.gz", hash = "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5"},
+    {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"},
+    {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"},
 ]
 
 [[package]]
 name = "pandas"
-version = "2.1.3"
+version = "2.2.2"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "pandas-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:acf08a73b5022b479c1be155d4988b72f3020f308f7a87c527702c5f8966d34f"},
-    {file = "pandas-2.1.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3cc4469ff0cf9aa3a005870cb49ab8969942b7156e0a46cc3f5abd6b11051dfb"},
-    {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35172bff95f598cc5866c047f43c7f4df2c893acd8e10e6653a4b792ed7f19bb"},
-    {file = "pandas-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:59dfe0e65a2f3988e940224e2a70932edc964df79f3356e5f2997c7d63e758b4"},
-    {file = "pandas-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0296a66200dee556850d99b24c54c7dfa53a3264b1ca6f440e42bad424caea03"},
-    {file = "pandas-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:465571472267a2d6e00657900afadbe6097c8e1dc43746917db4dfc862e8863e"},
-    {file = "pandas-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:04d4c58e1f112a74689da707be31cf689db086949c71828ef5da86727cfe3f82"},
-    {file = "pandas-2.1.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7fa2ad4ff196768ae63a33f8062e6838efed3a319cf938fdf8b95e956c813042"},
-    {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4441ac94a2a2613e3982e502ccec3bdedefe871e8cea54b8775992485c5660ef"},
-    {file = "pandas-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5ded6ff28abbf0ea7689f251754d3789e1edb0c4d0d91028f0b980598418a58"},
-    {file = "pandas-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fca5680368a5139d4920ae3dc993eb5106d49f814ff24018b64d8850a52c6ed2"},
-    {file = "pandas-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:de21e12bf1511190fc1e9ebc067f14ca09fccfb189a813b38d63211d54832f5f"},
-    {file = "pandas-2.1.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5d53c725832e5f1645e7674989f4c106e4b7249c1d57549023ed5462d73b140"},
-    {file = "pandas-2.1.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7cf4cf26042476e39394f1f86868d25b265ff787c9b2f0d367280f11afbdee6d"},
-    {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:72c84ec1b1d8e5efcbff5312abe92bfb9d5b558f11e0cf077f5496c4f4a3c99e"},
-    {file = "pandas-2.1.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f539e113739a3e0cc15176bf1231a553db0239bfa47a2c870283fd93ba4f683"},
-    {file = "pandas-2.1.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc77309da3b55732059e484a1efc0897f6149183c522390772d3561f9bf96c00"},
-    {file = "pandas-2.1.3-cp312-cp312-win_amd64.whl", hash = "sha256:08637041279b8981a062899da0ef47828df52a1838204d2b3761fbd3e9fcb549"},
-    {file = "pandas-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b99c4e51ef2ed98f69099c72c75ec904dd610eb41a32847c4fcbc1a975f2d2b8"},
-    {file = "pandas-2.1.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7ea8ae8004de0381a2376662c0505bb0a4f679f4c61fbfd122aa3d1b0e5f09d"},
-    {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcd76d67ca2d48f56e2db45833cf9d58f548f97f61eecd3fdc74268417632b8a"},
-    {file = "pandas-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1329dbe93a880a3d7893149979caa82d6ba64a25e471682637f846d9dbc10dd2"},
-    {file = "pandas-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:321ecdb117bf0f16c339cc6d5c9a06063854f12d4d9bc422a84bb2ed3207380a"},
-    {file = "pandas-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:11a771450f36cebf2a4c9dbd3a19dfa8c46c4b905a3ea09dc8e556626060fe71"},
-    {file = "pandas-2.1.3.tar.gz", hash = "sha256:22929f84bca106921917eb73c1521317ddd0a4c71b395bcf767a106e3494209f"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"},
+    {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"},
+    {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"},
+    {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"},
+    {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"},
+    {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"},
+    {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"},
+    {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"},
+    {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"},
+    {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"},
+    {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"},
+    {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"},
+    {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"},
+    {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"},
+    {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"},
+    {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"},
+    {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"},
+    {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"},
 ]
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
-    {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
-    {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
+    {version = ">=1.22.4", markers = "python_version < \"3.11\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
+    {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
-tzdata = ">=2022.1"
+tzdata = ">=2022.7"
 
 [package.extras]
-all = ["PyQt5 (>=5.15.6)", "SQLAlchemy (>=1.4.36)", "beautifulsoup4 (>=4.11.1)", "bottleneck (>=1.3.4)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=0.8.1)", "fsspec (>=2022.05.0)", "gcsfs (>=2022.05.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.8.0)", "matplotlib (>=3.6.1)", "numba (>=0.55.2)", "numexpr (>=2.8.0)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pandas-gbq (>=0.17.5)", "psycopg2 (>=2.9.3)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.5)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "pyxlsb (>=1.0.9)", "qtpy (>=2.2.0)", "s3fs (>=2022.05.0)", "scipy (>=1.8.1)", "tables (>=3.7.0)", "tabulate (>=0.8.10)", "xarray (>=2022.03.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)", "zstandard (>=0.17.0)"]
-aws = ["s3fs (>=2022.05.0)"]
-clipboard = ["PyQt5 (>=5.15.6)", "qtpy (>=2.2.0)"]
-compression = ["zstandard (>=0.17.0)"]
-computation = ["scipy (>=1.8.1)", "xarray (>=2022.03.0)"]
+all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"]
+aws = ["s3fs (>=2022.11.0)"]
+clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"]
+compression = ["zstandard (>=0.19.0)"]
+computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"]
 consortium-standard = ["dataframe-api-compat (>=0.1.7)"]
-excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.10)", "pyxlsb (>=1.0.9)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.3)"]
-feather = ["pyarrow (>=7.0.0)"]
-fss = ["fsspec (>=2022.05.0)"]
-gcp = ["gcsfs (>=2022.05.0)", "pandas-gbq (>=0.17.5)"]
-hdf5 = ["tables (>=3.7.0)"]
-html = ["beautifulsoup4 (>=4.11.1)", "html5lib (>=1.1)", "lxml (>=4.8.0)"]
-mysql = ["SQLAlchemy (>=1.4.36)", "pymysql (>=1.0.2)"]
-output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.8.10)"]
-parquet = ["pyarrow (>=7.0.0)"]
-performance = ["bottleneck (>=1.3.4)", "numba (>=0.55.2)", "numexpr (>=2.8.0)"]
-plot = ["matplotlib (>=3.6.1)"]
-postgresql = ["SQLAlchemy (>=1.4.36)", "psycopg2 (>=2.9.3)"]
-spss = ["pyreadstat (>=1.1.5)"]
-sql-other = ["SQLAlchemy (>=1.4.36)"]
+excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"]
+feather = ["pyarrow (>=10.0.1)"]
+fss = ["fsspec (>=2022.11.0)"]
+gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"]
+hdf5 = ["tables (>=3.8.0)"]
+html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"]
+mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"]
+output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"]
+parquet = ["pyarrow (>=10.0.1)"]
+performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"]
+plot = ["matplotlib (>=3.6.3)"]
+postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"]
+pyarrow = ["pyarrow (>=10.0.1)"]
+spss = ["pyreadstat (>=1.2.0)"]
+sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"]
 test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"]
-xml = ["lxml (>=4.8.0)"]
+xml = ["lxml (>=4.9.2)"]
 
 [[package]]
 name = "peft"
-version = "0.4.0"
+version = "0.9.0"
 description = "Parameter-Efficient Fine-Tuning (PEFT)"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "peft-0.4.0-py3-none-any.whl", hash = "sha256:2cf992772a6d703814477e0bdcdadd68cb8ea388111ce2d793dd2ff0e438f357"},
-    {file = "peft-0.4.0.tar.gz", hash = "sha256:e768fa22d6e9f32aa7e891f0d06f355960278ca4dc0cdd96bff71f6f06269207"},
+    {file = "peft-0.9.0-py3-none-any.whl", hash = "sha256:d14223fee6050c53593733e8f763d94c13577e1220987f59ae473d988f2ccd91"},
+    {file = "peft-0.9.0.tar.gz", hash = "sha256:3b8d09dff94d1bfa72e064cb26af5952fd82428e2bcce432cfaf091f5035b04b"},
 ]
 
 [package.dependencies]
-accelerate = "*"
+accelerate = ">=0.21.0"
+huggingface-hub = ">=0.17.0"
 numpy = ">=1.17"
 packaging = ">=20.0"
 psutil = "*"
 pyyaml = "*"
 safetensors = "*"
 torch = ">=1.13.0"
+tqdm = "*"
 transformers = "*"
 
 [package.extras]
-dev = ["black (>=22.0,<23.0)", "hf-doc-builder", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"]
-docs-specific = ["hf-doc-builder"]
-quality = ["black (>=22.0,<23.0)", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"]
-test = ["black (>=22.0,<23.0)", "datasets", "diffusers", "hf-doc-builder", "parameterized", "pytest", "pytest-cov", "pytest-xdist", "ruff (>=0.0.241)", "urllib3 (<=2.0.0)"]
+dev = ["black", "hf-doc-builder", "ruff (>=0.2.1,<0.3.0)"]
+docs-specific = ["black", "hf-doc-builder"]
+quality = ["black", "hf-doc-builder", "ruff (>=0.2.1,<0.3.0)"]
+test = ["black", "datasets", "diffusers (<0.21.0)", "hf-doc-builder", "parameterized", "pytest", "pytest-cov", "pytest-xdist", "ruff (>=0.2.1,<0.3.0)", "scipy"]
 
 [[package]]
 name = "pillow"
-version = "10.1.0"
+version = "10.3.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "Pillow-10.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1ab05f3db77e98f93964697c8efc49c7954b08dd61cff526b7f2531a22410106"},
-    {file = "Pillow-10.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6932a7652464746fcb484f7fc3618e6503d2066d853f68a4bd97193a3996e273"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f63b5a68daedc54c7c3464508d8c12075e56dcfbd42f8c1bf40169061ae666"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0949b55eb607898e28eaccb525ab104b2d86542a85c74baf3a6dc24002edec2"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ae88931f93214777c7a3aa0a8f92a683f83ecde27f65a45f95f22d289a69e593"},
-    {file = "Pillow-10.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b0eb01ca85b2361b09480784a7931fc648ed8b7836f01fb9241141b968feb1db"},
-    {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d27b5997bdd2eb9fb199982bb7eb6164db0426904020dc38c10203187ae2ff2f"},
-    {file = "Pillow-10.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7df5608bc38bd37ef585ae9c38c9cd46d7c81498f086915b0f97255ea60c2818"},
-    {file = "Pillow-10.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:41f67248d92a5e0a2076d3517d8d4b1e41a97e2df10eb8f93106c89107f38b57"},
-    {file = "Pillow-10.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1fb29c07478e6c06a46b867e43b0bcdb241b44cc52be9bc25ce5944eed4648e7"},
-    {file = "Pillow-10.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2cdc65a46e74514ce742c2013cd4a2d12e8553e3a2563c64879f7c7e4d28bce7"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50d08cd0a2ecd2a8657bd3d82c71efd5a58edb04d9308185d66c3a5a5bed9610"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062a1610e3bc258bff2328ec43f34244fcec972ee0717200cb1425214fe5b839"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:61f1a9d247317fa08a308daaa8ee7b3f760ab1809ca2da14ecc88ae4257d6172"},
-    {file = "Pillow-10.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a646e48de237d860c36e0db37ecaecaa3619e6f3e9d5319e527ccbc8151df061"},
-    {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:47e5bf85b80abc03be7455c95b6d6e4896a62f6541c1f2ce77a7d2bb832af262"},
-    {file = "Pillow-10.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:a92386125e9ee90381c3369f57a2a50fa9e6aa8b1cf1d9c4b200d41a7dd8e992"},
-    {file = "Pillow-10.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0f7c276c05a9767e877a0b4c5050c8bee6a6d960d7f0c11ebda6b99746068c2a"},
-    {file = "Pillow-10.1.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:a89b8312d51715b510a4fe9fc13686283f376cfd5abca8cd1c65e4c76e21081b"},
-    {file = "Pillow-10.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:00f438bb841382b15d7deb9a05cc946ee0f2c352653c7aa659e75e592f6fa17d"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d929a19f5469b3f4df33a3df2983db070ebb2088a1e145e18facbc28cae5b27"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9a92109192b360634a4489c0c756364c0c3a2992906752165ecb50544c251312"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:0248f86b3ea061e67817c47ecbe82c23f9dd5d5226200eb9090b3873d3ca32de"},
-    {file = "Pillow-10.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9882a7451c680c12f232a422730f986a1fcd808da0fd428f08b671237237d651"},
-    {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1c3ac5423c8c1da5928aa12c6e258921956757d976405e9467c5f39d1d577a4b"},
-    {file = "Pillow-10.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:806abdd8249ba3953c33742506fe414880bad78ac25cc9a9b1c6ae97bedd573f"},
-    {file = "Pillow-10.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:eaed6977fa73408b7b8a24e8b14e59e1668cfc0f4c40193ea7ced8e210adf996"},
-    {file = "Pillow-10.1.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:fe1e26e1ffc38be097f0ba1d0d07fcade2bcfd1d023cda5b29935ae8052bd793"},
-    {file = "Pillow-10.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7a7e3daa202beb61821c06d2517428e8e7c1aab08943e92ec9e5755c2fc9ba5e"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24fadc71218ad2b8ffe437b54876c9382b4a29e030a05a9879f615091f42ffc2"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa1d323703cfdac2036af05191b969b910d8f115cf53093125e4058f62012c9a"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:912e3812a1dbbc834da2b32299b124b5ddcb664ed354916fd1ed6f193f0e2d01"},
-    {file = "Pillow-10.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:7dbaa3c7de82ef37e7708521be41db5565004258ca76945ad74a8e998c30af8d"},
-    {file = "Pillow-10.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9d7bc666bd8c5a4225e7ac71f2f9d12466ec555e89092728ea0f5c0c2422ea80"},
-    {file = "Pillow-10.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:baada14941c83079bf84c037e2d8b7506ce201e92e3d2fa0d1303507a8538212"},
-    {file = "Pillow-10.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:2ef6721c97894a7aa77723740a09547197533146fba8355e86d6d9a4a1056b14"},
-    {file = "Pillow-10.1.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0a026c188be3b443916179f5d04548092e253beb0c3e2ee0a4e2cdad72f66099"},
-    {file = "Pillow-10.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:04f6f6149f266a100374ca3cc368b67fb27c4af9f1cc8cb6306d849dcdf12616"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb40c011447712d2e19cc261c82655f75f32cb724788df315ed992a4d65696bb"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1a8413794b4ad9719346cd9306118450b7b00d9a15846451549314a58ac42219"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c9aeea7b63edb7884b031a35305629a7593272b54f429a9869a4f63a1bf04c34"},
-    {file = "Pillow-10.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b4005fee46ed9be0b8fb42be0c20e79411533d1fd58edabebc0dd24626882cfd"},
-    {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4d0152565c6aa6ebbfb1e5d8624140a440f2b99bf7afaafbdbf6430426497f28"},
-    {file = "Pillow-10.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d921bc90b1defa55c9917ca6b6b71430e4286fc9e44c55ead78ca1a9f9eba5f2"},
-    {file = "Pillow-10.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:cfe96560c6ce2f4c07d6647af2d0f3c54cc33289894ebd88cfbb3bcd5391e256"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:937bdc5a7f5343d1c97dc98149a0be7eb9704e937fe3dc7140e229ae4fc572a7"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1c25762197144e211efb5f4e8ad656f36c8d214d390585d1d21281f46d556ba"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:afc8eef765d948543a4775f00b7b8c079b3321d6b675dde0d02afa2ee23000b4"},
-    {file = "Pillow-10.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:883f216eac8712b83a63f41b76ddfb7b2afab1b74abbb413c5df6680f071a6b9"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b920e4d028f6442bea9a75b7491c063f0b9a3972520731ed26c83e254302eb1e"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c41d960babf951e01a49c9746f92c5a7e0d939d1652d7ba30f6b3090f27e412"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1fafabe50a6977ac70dfe829b2d5735fd54e190ab55259ec8aea4aaea412fa0b"},
-    {file = "Pillow-10.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:3b834f4b16173e5b92ab6566f0473bfb09f939ba14b23b8da1f54fa63e4b623f"},
-    {file = "Pillow-10.1.0.tar.gz", hash = "sha256:e6bf8de6c36ed96c86ea3b6e1d5273c53f46ef518a062464cd7ef5dd2cf92e38"},
+    {file = "pillow-10.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:90b9e29824800e90c84e4022dd5cc16eb2d9605ee13f05d47641eb183cd73d45"},
+    {file = "pillow-10.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a2c405445c79c3f5a124573a051062300936b0281fee57637e706453e452746c"},
+    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78618cdbccaa74d3f88d0ad6cb8ac3007f1a6fa5c6f19af64b55ca170bfa1edf"},
+    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261ddb7ca91fcf71757979534fb4c128448b5b4c55cb6152d280312062f69599"},
+    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:ce49c67f4ea0609933d01c0731b34b8695a7a748d6c8d186f95e7d085d2fe475"},
+    {file = "pillow-10.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b14f16f94cbc61215115b9b1236f9c18403c15dd3c52cf629072afa9d54c1cbf"},
+    {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d33891be6df59d93df4d846640f0e46f1a807339f09e79a8040bc887bdcd7ed3"},
+    {file = "pillow-10.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b50811d664d392f02f7761621303eba9d1b056fb1868c8cdf4231279645c25f5"},
+    {file = "pillow-10.3.0-cp310-cp310-win32.whl", hash = "sha256:ca2870d5d10d8726a27396d3ca4cf7976cec0f3cb706debe88e3a5bd4610f7d2"},
+    {file = "pillow-10.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:f0d0591a0aeaefdaf9a5e545e7485f89910c977087e7de2b6c388aec32011e9f"},
+    {file = "pillow-10.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:ccce24b7ad89adb5a1e34a6ba96ac2530046763912806ad4c247356a8f33a67b"},
+    {file = "pillow-10.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:5f77cf66e96ae734717d341c145c5949c63180842a545c47a0ce7ae52ca83795"},
+    {file = "pillow-10.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e4b878386c4bf293578b48fc570b84ecfe477d3b77ba39a6e87150af77f40c57"},
+    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fdcbb4068117dfd9ce0138d068ac512843c52295ed996ae6dd1faf537b6dbc27"},
+    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9797a6c8fe16f25749b371c02e2ade0efb51155e767a971c61734b1bf6293994"},
+    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:9e91179a242bbc99be65e139e30690e081fe6cb91a8e77faf4c409653de39451"},
+    {file = "pillow-10.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:1b87bd9d81d179bd8ab871603bd80d8645729939f90b71e62914e816a76fc6bd"},
+    {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:81d09caa7b27ef4e61cb7d8fbf1714f5aec1c6b6c5270ee53504981e6e9121ad"},
+    {file = "pillow-10.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:048ad577748b9fa4a99a0548c64f2cb8d672d5bf2e643a739ac8faff1164238c"},
+    {file = "pillow-10.3.0-cp311-cp311-win32.whl", hash = "sha256:7161ec49ef0800947dc5570f86568a7bb36fa97dd09e9827dc02b718c5643f09"},
+    {file = "pillow-10.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:8eb0908e954d093b02a543dc963984d6e99ad2b5e36503d8a0aaf040505f747d"},
+    {file = "pillow-10.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:4e6f7d1c414191c1199f8996d3f2282b9ebea0945693fb67392c75a3a320941f"},
+    {file = "pillow-10.3.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:e46f38133e5a060d46bd630faa4d9fa0202377495df1f068a8299fd78c84de84"},
+    {file = "pillow-10.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:50b8eae8f7334ec826d6eeffaeeb00e36b5e24aa0b9df322c247539714c6df19"},
+    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d3bea1c75f8c53ee4d505c3e67d8c158ad4df0d83170605b50b64025917f338"},
+    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:19aeb96d43902f0a783946a0a87dbdad5c84c936025b8419da0a0cd7724356b1"},
+    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:74d28c17412d9caa1066f7a31df8403ec23d5268ba46cd0ad2c50fb82ae40462"},
+    {file = "pillow-10.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:ff61bfd9253c3915e6d41c651d5f962da23eda633cf02262990094a18a55371a"},
+    {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:d886f5d353333b4771d21267c7ecc75b710f1a73d72d03ca06df49b09015a9ef"},
+    {file = "pillow-10.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b5ec25d8b17217d635f8935dbc1b9aa5907962fae29dff220f2659487891cd3"},
+    {file = "pillow-10.3.0-cp312-cp312-win32.whl", hash = "sha256:51243f1ed5161b9945011a7360e997729776f6e5d7005ba0c6879267d4c5139d"},
+    {file = "pillow-10.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:412444afb8c4c7a6cc11a47dade32982439925537e483be7c0ae0cf96c4f6a0b"},
+    {file = "pillow-10.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:798232c92e7665fe82ac085f9d8e8ca98826f8e27859d9a96b41d519ecd2e49a"},
+    {file = "pillow-10.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:4eaa22f0d22b1a7e93ff0a596d57fdede2e550aecffb5a1ef1106aaece48e96b"},
+    {file = "pillow-10.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cd5e14fbf22a87321b24c88669aad3a51ec052eb145315b3da3b7e3cc105b9a2"},
+    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1530e8f3a4b965eb6a7785cf17a426c779333eb62c9a7d1bbcf3ffd5bf77a4aa"},
+    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d512aafa1d32efa014fa041d38868fda85028e3f930a96f85d49c7d8ddc0383"},
+    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:339894035d0ede518b16073bdc2feef4c991ee991a29774b33e515f1d308e08d"},
+    {file = "pillow-10.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:aa7e402ce11f0885305bfb6afb3434b3cd8f53b563ac065452d9d5654c7b86fd"},
+    {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:0ea2a783a2bdf2a561808fe4a7a12e9aa3799b701ba305de596bc48b8bdfce9d"},
+    {file = "pillow-10.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:c78e1b00a87ce43bb37642c0812315b411e856a905d58d597750eb79802aaaa3"},
+    {file = "pillow-10.3.0-cp38-cp38-win32.whl", hash = "sha256:72d622d262e463dfb7595202d229f5f3ab4b852289a1cd09650362db23b9eb0b"},
+    {file = "pillow-10.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:2034f6759a722da3a3dbd91a81148cf884e91d1b747992ca288ab88c1de15999"},
+    {file = "pillow-10.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2ed854e716a89b1afcedea551cd85f2eb2a807613752ab997b9974aaa0d56936"},
+    {file = "pillow-10.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dc1a390a82755a8c26c9964d457d4c9cbec5405896cba94cf51f36ea0d855002"},
+    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4203efca580f0dd6f882ca211f923168548f7ba334c189e9eab1178ab840bf60"},
+    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3102045a10945173d38336f6e71a8dc71bcaeed55c3123ad4af82c52807b9375"},
+    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6fb1b30043271ec92dc65f6d9f0b7a830c210b8a96423074b15c7bc999975f57"},
+    {file = "pillow-10.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1dfc94946bc60ea375cc39cff0b8da6c7e5f8fcdc1d946beb8da5c216156ddd8"},
+    {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b09b86b27a064c9624d0a6c54da01c1beaf5b6cadfa609cf63789b1d08a797b9"},
+    {file = "pillow-10.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d3b2348a78bc939b4fed6552abfd2e7988e0f81443ef3911a4b8498ca084f6eb"},
+    {file = "pillow-10.3.0-cp39-cp39-win32.whl", hash = "sha256:45ebc7b45406febf07fef35d856f0293a92e7417ae7933207e90bf9090b70572"},
+    {file = "pillow-10.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:0ba26351b137ca4e0db0342d5d00d2e355eb29372c05afd544ebf47c0956ffeb"},
+    {file = "pillow-10.3.0-cp39-cp39-win_arm64.whl", hash = "sha256:50fd3f6b26e3441ae07b7c979309638b72abc1a25da31a81a7fbd9495713ef4f"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_10_10_x86_64.whl", hash = "sha256:6b02471b72526ab8a18c39cb7967b72d194ec53c1fd0a70b050565a0f366d355"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8ab74c06ffdab957d7670c2a5a6e1a70181cd10b727cd788c4dd9005b6a8acd9"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:048eeade4c33fdf7e08da40ef402e748df113fd0b4584e32c4af74fe78baaeb2"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2ec1e921fd07c7cda7962bad283acc2f2a9ccc1b971ee4b216b75fad6f0463"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c8e73e99da7db1b4cad7f8d682cf6abad7844da39834c288fbfa394a47bbced"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:16563993329b79513f59142a6b02055e10514c1a8e86dca8b48a893e33cf91e3"},
+    {file = "pillow-10.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:dd78700f5788ae180b5ee8902c6aea5a5726bac7c364b202b4b3e3ba2d293170"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_10_10_x86_64.whl", hash = "sha256:aff76a55a8aa8364d25400a210a65ff59d0168e0b4285ba6bf2bd83cf675ba32"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b7bc2176354defba3edc2b9a777744462da2f8e921fbaf61e52acb95bafa9828"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:793b4e24db2e8742ca6423d3fde8396db336698c55cd34b660663ee9e45ed37f"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d93480005693d247f8346bc8ee28c72a2191bdf1f6b5db469c096c0c867ac015"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c83341b89884e2b2e55886e8fbbf37c3fa5efd6c8907124aeb72f285ae5696e5"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1a1d1915db1a4fdb2754b9de292642a39a7fb28f1736699527bb649484fb966a"},
+    {file = "pillow-10.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a0eaa93d054751ee9964afa21c06247779b90440ca41d184aeb5d410f20ff591"},
+    {file = "pillow-10.3.0.tar.gz", hash = "sha256:9d2455fbf44c914840c793e89aa82d0e1763a14253a000743719ae5946814b2d"},
 ]
 
 [package.extras]
 docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinx-removed-in", "sphinxext-opengraph"]
+fpx = ["olefile"]
+mic = ["olefile"]
 tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
+typing = ["typing-extensions"]
+xmp = ["defusedxml"]
 
 [[package]]
 name = "pluggy"
-version = "1.3.0"
+version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pluggy-1.3.0-py3-none-any.whl", hash = "sha256:d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7"},
-    {file = "pluggy-1.3.0.tar.gz", hash = "sha256:cf61ae8f126ac6f7c451172cf30e3e43d3ca77615509771b3a984a0730651e12"},
+    {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
+    {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
 ]
 
 [package.extras]
@@ -1762,27 +2108,27 @@ files = [
 
 [[package]]
 name = "psutil"
-version = "5.9.6"
+version = "5.9.8"
 description = "Cross-platform lib for process and system monitoring in Python."
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
-    {file = "psutil-5.9.6-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:fb8a697f11b0f5994550555fcfe3e69799e5b060c8ecf9e2f75c69302cc35c0d"},
-    {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:91ecd2d9c00db9817a4b4192107cf6954addb5d9d67a969a4f436dbc9200f88c"},
-    {file = "psutil-5.9.6-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:10e8c17b4f898d64b121149afb136c53ea8b68c7531155147867b7b1ac9e7e28"},
-    {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:18cd22c5db486f33998f37e2bb054cc62fd06646995285e02a51b1e08da97017"},
-    {file = "psutil-5.9.6-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:ca2780f5e038379e520281e4c032dddd086906ddff9ef0d1b9dcf00710e5071c"},
-    {file = "psutil-5.9.6-cp27-none-win32.whl", hash = "sha256:70cb3beb98bc3fd5ac9ac617a327af7e7f826373ee64c80efd4eb2856e5051e9"},
-    {file = "psutil-5.9.6-cp27-none-win_amd64.whl", hash = "sha256:51dc3d54607c73148f63732c727856f5febec1c7c336f8f41fcbd6315cce76ac"},
-    {file = "psutil-5.9.6-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:c69596f9fc2f8acd574a12d5f8b7b1ba3765a641ea5d60fb4736bf3c08a8214a"},
-    {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92e0cc43c524834af53e9d3369245e6cc3b130e78e26100d1f63cdb0abeb3d3c"},
-    {file = "psutil-5.9.6-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:748c9dd2583ed86347ed65d0035f45fa8c851e8d90354c122ab72319b5f366f4"},
-    {file = "psutil-5.9.6-cp36-cp36m-win32.whl", hash = "sha256:3ebf2158c16cc69db777e3c7decb3c0f43a7af94a60d72e87b2823aebac3d602"},
-    {file = "psutil-5.9.6-cp36-cp36m-win_amd64.whl", hash = "sha256:ff18b8d1a784b810df0b0fff3bcb50ab941c3b8e2c8de5726f9c71c601c611aa"},
-    {file = "psutil-5.9.6-cp37-abi3-win32.whl", hash = "sha256:a6f01f03bf1843280f4ad16f4bde26b817847b4c1a0db59bf6419807bc5ce05c"},
-    {file = "psutil-5.9.6-cp37-abi3-win_amd64.whl", hash = "sha256:6e5fb8dc711a514da83098bc5234264e551ad980cec5f85dabf4d38ed6f15e9a"},
-    {file = "psutil-5.9.6-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:daecbcbd29b289aac14ece28eca6a3e60aa361754cf6da3dfb20d4d32b6c7f57"},
-    {file = "psutil-5.9.6.tar.gz", hash = "sha256:e4b92ddcd7dd4cdd3f900180ea1e104932c7bce234fb88976e2a3b296441225a"},
+    {file = "psutil-5.9.8-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:26bd09967ae00920df88e0352a91cff1a78f8d69b3ecabbfe733610c0af486c8"},
+    {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:05806de88103b25903dff19bb6692bd2e714ccf9e668d050d144012055cbca73"},
+    {file = "psutil-5.9.8-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:611052c4bc70432ec770d5d54f64206aa7203a101ec273a0cd82418c86503bb7"},
+    {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:50187900d73c1381ba1454cf40308c2bf6f34268518b3f36a9b663ca87e65e36"},
+    {file = "psutil-5.9.8-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:02615ed8c5ea222323408ceba16c60e99c3f91639b07da6373fb7e6539abc56d"},
+    {file = "psutil-5.9.8-cp27-none-win32.whl", hash = "sha256:36f435891adb138ed3c9e58c6af3e2e6ca9ac2f365efe1f9cfef2794e6c93b4e"},
+    {file = "psutil-5.9.8-cp27-none-win_amd64.whl", hash = "sha256:bd1184ceb3f87651a67b2708d4c3338e9b10c5df903f2e3776b62303b26cb631"},
+    {file = "psutil-5.9.8-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:aee678c8720623dc456fa20659af736241f575d79429a0e5e9cf88ae0605cc81"},
+    {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cb6403ce6d8e047495a701dc7c5bd788add903f8986d523e3e20b98b733e421"},
+    {file = "psutil-5.9.8-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d06016f7f8625a1825ba3732081d77c94589dca78b7a3fc072194851e88461a4"},
+    {file = "psutil-5.9.8-cp36-cp36m-win32.whl", hash = "sha256:7d79560ad97af658a0f6adfef8b834b53f64746d45b403f225b85c5c2c140eee"},
+    {file = "psutil-5.9.8-cp36-cp36m-win_amd64.whl", hash = "sha256:27cc40c3493bb10de1be4b3f07cae4c010ce715290a5be22b98493509c6299e2"},
+    {file = "psutil-5.9.8-cp37-abi3-win32.whl", hash = "sha256:bc56c2a1b0d15aa3eaa5a60c9f3f8e3e565303b465dbf57a1b730e7a2b9844e0"},
+    {file = "psutil-5.9.8-cp37-abi3-win_amd64.whl", hash = "sha256:8db4c1b57507eef143a15a6884ca10f7c73876cdf5d51e713151c1236a0e68cf"},
+    {file = "psutil-5.9.8-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d16bbddf0693323b8c6123dd804100241da461e41d6e332fb0ba6058f630f8c8"},
+    {file = "psutil-5.9.8.tar.gz", hash = "sha256:6be126e3225486dff286a8fb9a06246a5253f4c7c53b475ea5f5ac934e64194c"},
 ]
 
 [package.extras]
@@ -1790,47 +2136,47 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"]
 
 [[package]]
 name = "pyarrow"
-version = "14.0.1"
+version = "16.0.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pyarrow-14.0.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:96d64e5ba7dceb519a955e5eeb5c9adcfd63f73a56aea4722e2cc81364fc567a"},
-    {file = "pyarrow-14.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1a8ae88c0038d1bc362a682320112ee6774f006134cd5afc291591ee4bc06505"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f6f053cb66dc24091f5511e5920e45c83107f954a21032feadc7b9e3a8e7851"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:906b0dc25f2be12e95975722f1e60e162437023f490dbd80d0deb7375baf3171"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:78d4a77a46a7de9388b653af1c4ce539350726cd9af62e0831e4f2bd0c95a2f4"},
-    {file = "pyarrow-14.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:06ca79080ef89d6529bb8e5074d4b4f6086143b2520494fcb7cf8a99079cde93"},
-    {file = "pyarrow-14.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:32542164d905002c42dff896efdac79b3bdd7291b1b74aa292fac8450d0e4dcd"},
-    {file = "pyarrow-14.0.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:c7331b4ed3401b7ee56f22c980608cf273f0380f77d0f73dd3c185f78f5a6220"},
-    {file = "pyarrow-14.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:922e8b49b88da8633d6cac0e1b5a690311b6758d6f5d7c2be71acb0f1e14cd61"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58c889851ca33f992ea916b48b8540735055201b177cb0dcf0596a495a667b00"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30d8494870d9916bb53b2a4384948491444741cb9a38253c590e21f836b01222"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:be28e1a07f20391bb0b15ea03dcac3aade29fc773c5eb4bee2838e9b2cdde0cb"},
-    {file = "pyarrow-14.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:981670b4ce0110d8dcb3246410a4aabf5714db5d8ea63b15686bce1c914b1f83"},
-    {file = "pyarrow-14.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:4756a2b373a28f6166c42711240643fb8bd6322467e9aacabd26b488fa41ec23"},
-    {file = "pyarrow-14.0.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:cf87e2cec65dd5cf1aa4aba918d523ef56ef95597b545bbaad01e6433851aa10"},
-    {file = "pyarrow-14.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:470ae0194fbfdfbf4a6b65b4f9e0f6e1fa0ea5b90c1ee6b65b38aecee53508c8"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6263cffd0c3721c1e348062997babdf0151301f7353010c9c9a8ed47448f82ab"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8089d7e77d1455d529dbd7cff08898bbb2666ee48bc4085203af1d826a33cc"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fada8396bc739d958d0b81d291cfd201126ed5e7913cb73de6bc606befc30226"},
-    {file = "pyarrow-14.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a145dab9ed7849fc1101bf03bcdc69913547f10513fdf70fc3ab6c0a50c7eee"},
-    {file = "pyarrow-14.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:05fe7994745b634c5fb16ce5717e39a1ac1fac3e2b0795232841660aa76647cd"},
-    {file = "pyarrow-14.0.1-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:a8eeef015ae69d104c4c3117a6011e7e3ecd1abec79dc87fd2fac6e442f666ee"},
-    {file = "pyarrow-14.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3c76807540989fe8fcd02285dd15e4f2a3da0b09d27781abec3adc265ddbeba1"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:450e4605e3c20e558485f9161a79280a61c55efe585d51513c014de9ae8d393f"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:323cbe60210173ffd7db78bfd50b80bdd792c4c9daca8843ef3cd70b186649db"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0140c7e2b740e08c5a459439d87acd26b747fc408bde0a8806096ee0baaa0c15"},
-    {file = "pyarrow-14.0.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:e592e482edd9f1ab32f18cd6a716c45b2c0f2403dc2af782f4e9674952e6dd27"},
-    {file = "pyarrow-14.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:d264ad13605b61959f2ae7c1d25b1a5b8505b112715c961418c8396433f213ad"},
-    {file = "pyarrow-14.0.1-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:01e44de9749cddc486169cb632f3c99962318e9dacac7778315a110f4bf8a450"},
-    {file = "pyarrow-14.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0351fecf0e26e152542bc164c22ea2a8e8c682726fce160ce4d459ea802d69c"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c1f6110c386464fd2e5e4ea3624466055bbe681ff185fd6c9daa98f30a3f9a"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11e045dfa09855b6d3e7705a37c42e2dc2c71d608fab34d3c23df2e02df9aec3"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:097828b55321897db0e1dbfc606e3ff8101ae5725673498cbfa7754ee0da80e4"},
-    {file = "pyarrow-14.0.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:1daab52050a1c48506c029e6fa0944a7b2436334d7e44221c16f6f1b2cc9c510"},
-    {file = "pyarrow-14.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:3f6d5faf4f1b0d5a7f97be987cf9e9f8cd39902611e818fe134588ee99bf0283"},
-    {file = "pyarrow-14.0.1.tar.gz", hash = "sha256:b8b3f4fe8d4ec15e1ef9b599b94683c5216adaed78d5cb4c606180546d1e2ee1"},
+    {file = "pyarrow-16.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:22a1fdb1254e5095d629e29cd1ea98ed04b4bbfd8e42cc670a6b639ccc208b60"},
+    {file = "pyarrow-16.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:574a00260a4ed9d118a14770edbd440b848fcae5a3024128be9d0274dbcaf858"},
+    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0815d0ddb733b8c1b53a05827a91f1b8bde6240f3b20bf9ba5d650eb9b89cdf"},
+    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df0080339387b5d30de31e0a149c0c11a827a10c82f0c67d9afae3981d1aabb7"},
+    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:edf38cce0bf0dcf726e074159c60516447e4474904c0033f018c1f33d7dac6c5"},
+    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91d28f9a40f1264eab2af7905a4d95320ac2f287891e9c8b0035f264fe3c3a4b"},
+    {file = "pyarrow-16.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:99af421ee451a78884d7faea23816c429e263bd3618b22d38e7992c9ce2a7ad9"},
+    {file = "pyarrow-16.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d22d0941e6c7bafddf5f4c0662e46f2075850f1c044bf1a03150dd9e189427ce"},
+    {file = "pyarrow-16.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:266ddb7e823f03733c15adc8b5078db2df6980f9aa93d6bb57ece615df4e0ba7"},
+    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cc23090224b6594f5a92d26ad47465af47c1d9c079dd4a0061ae39551889efe"},
+    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56850a0afe9ef37249d5387355449c0f94d12ff7994af88f16803a26d38f2016"},
+    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:705db70d3e2293c2f6f8e84874b5b775f690465798f66e94bb2c07bab0a6bb55"},
+    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:5448564754c154997bc09e95a44b81b9e31ae918a86c0fcb35c4aa4922756f55"},
+    {file = "pyarrow-16.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:729f7b262aa620c9df8b9967db96c1575e4cfc8c25d078a06968e527b8d6ec05"},
+    {file = "pyarrow-16.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:fb8065dbc0d051bf2ae2453af0484d99a43135cadabacf0af588a3be81fbbb9b"},
+    {file = "pyarrow-16.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ce707d9aa390593ea93218b19d0eadab56390311cb87aad32c9a869b0e958c"},
+    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5823275c8addbbb50cd4e6a6839952682a33255b447277e37a6f518d6972f4e1"},
+    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ab8b9050752b16a8b53fcd9853bf07d8daf19093533e990085168f40c64d978"},
+    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:42e56557bc7c5c10d3e42c3b32f6cff649a29d637e8f4e8b311d334cc4326730"},
+    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a7abdee4a4a7cfa239e2e8d721224c4b34ffe69a0ca7981354fe03c1328789b"},
+    {file = "pyarrow-16.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:ef2f309b68396bcc5a354106741d333494d6a0d3e1951271849787109f0229a6"},
+    {file = "pyarrow-16.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:ed66e5217b4526fa3585b5e39b0b82f501b88a10d36bd0d2a4d8aa7b5a48e2df"},
+    {file = "pyarrow-16.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc8814310486f2a73c661ba8354540f17eef51e1b6dd090b93e3419d3a097b3a"},
+    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c2f5e239db7ed43e0ad2baf46a6465f89c824cc703f38ef0fde927d8e0955f7"},
+    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f293e92d1db251447cb028ae12f7bc47526e4649c3a9924c8376cab4ad6b98bd"},
+    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:dd9334a07b6dc21afe0857aa31842365a62eca664e415a3f9536e3a8bb832c07"},
+    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d91073d1e2fef2c121154680e2ba7e35ecf8d4969cc0af1fa6f14a8675858159"},
+    {file = "pyarrow-16.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:71d52561cd7aefd22cf52538f262850b0cc9e4ec50af2aaa601da3a16ef48877"},
+    {file = "pyarrow-16.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b93c9a50b965ee0bf4fef65e53b758a7e8dcc0c2d86cebcc037aaaf1b306ecc0"},
+    {file = "pyarrow-16.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d831690844706e374c455fba2fb8cfcb7b797bfe53ceda4b54334316e1ac4fa4"},
+    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35692ce8ad0b8c666aa60f83950957096d92f2a9d8d7deda93fb835e6053307e"},
+    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dd3151d098e56f16a8389c1247137f9e4c22720b01c6f3aa6dec29a99b74d80"},
+    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bd40467bdb3cbaf2044ed7a6f7f251c8f941c8b31275aaaf88e746c4f3ca4a7a"},
+    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:00a1dcb22ad4ceb8af87f7bd30cc3354788776c417f493089e0a0af981bc8d80"},
+    {file = "pyarrow-16.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fda9a7cebd1b1d46c97b511f60f73a5b766a6de4c5236f144f41a5d5afec1f35"},
+    {file = "pyarrow-16.0.0.tar.gz", hash = "sha256:59bb1f1edbbf4114c72415f039f1359f1a57d166a331c3229788ccbfbb31689a"},
 ]
 
 [package.dependencies]
@@ -1847,6 +2193,116 @@ files = [
     {file = "pyarrow_hotfix-0.6.tar.gz", hash = "sha256:79d3e030f7ff890d408a100ac16d6f00b14d44a502d7897cd9fc3e3a534e9945"},
 ]
 
+[[package]]
+name = "pydantic"
+version = "2.7.1"
+description = "Data validation using Python type hints"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "pydantic-2.7.1-py3-none-any.whl", hash = "sha256:e029badca45266732a9a79898a15ae2e8b14840b1eabbb25844be28f0b33f3d5"},
+    {file = "pydantic-2.7.1.tar.gz", hash = "sha256:e9dbb5eada8abe4d9ae5f46b9939aead650cd2b68f249bb3a8139dbe125803cc"},
+]
+
+[package.dependencies]
+annotated-types = ">=0.4.0"
+pydantic-core = "2.18.2"
+typing-extensions = ">=4.6.1"
+
+[package.extras]
+email = ["email-validator (>=2.0.0)"]
+
+[[package]]
+name = "pydantic-core"
+version = "2.18.2"
+description = "Core functionality for Pydantic validation and serialization"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9e08e867b306f525802df7cd16c44ff5ebbe747ff0ca6cf3fde7f36c05a59a81"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f0a21cbaa69900cbe1a2e7cad2aa74ac3cf21b10c3efb0fa0b80305274c0e8a2"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0680b1f1f11fda801397de52c36ce38ef1c1dc841a0927a94f226dea29c3ae3d"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:95b9d5e72481d3780ba3442eac863eae92ae43a5f3adb5b4d0a1de89d42bb250"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fcf5cd9c4b655ad666ca332b9a081112cd7a58a8b5a6ca7a3104bc950f2038"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b5155ff768083cb1d62f3e143b49a8a3432e6789a3abee8acd005c3c7af1c74"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:553ef617b6836fc7e4df130bb851e32fe357ce36336d897fd6646d6058d980af"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b89ed9eb7d616ef5714e5590e6cf7f23b02d0d539767d33561e3675d6f9e3857"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:75f7e9488238e920ab6204399ded280dc4c307d034f3924cd7f90a38b1829563"},
+    {file = "pydantic_core-2.18.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ef26c9e94a8c04a1b2924149a9cb081836913818e55681722d7f29af88fe7b38"},
+    {file = "pydantic_core-2.18.2-cp310-none-win32.whl", hash = "sha256:182245ff6b0039e82b6bb585ed55a64d7c81c560715d1bad0cbad6dfa07b4027"},
+    {file = "pydantic_core-2.18.2-cp310-none-win_amd64.whl", hash = "sha256:e23ec367a948b6d812301afc1b13f8094ab7b2c280af66ef450efc357d2ae543"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:219da3f096d50a157f33645a1cf31c0ad1fe829a92181dd1311022f986e5fbe3"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc1cfd88a64e012b74e94cd00bbe0f9c6df57049c97f02bb07d39e9c852e19a4"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b7133a6e6aeb8df37d6f413f7705a37ab4031597f64ab56384c94d98fa0e90"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:224c421235f6102e8737032483f43c1a8cfb1d2f45740c44166219599358c2cd"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b14d82cdb934e99dda6d9d60dc84a24379820176cc4a0d123f88df319ae9c150"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2728b01246a3bba6de144f9e3115b532ee44bd6cf39795194fb75491824a1413"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:470b94480bb5ee929f5acba6995251ada5e059a5ef3e0dfc63cca287283ebfa6"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:997abc4df705d1295a42f95b4eec4950a37ad8ae46d913caeee117b6b198811c"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:75250dbc5290e3f1a0f4618db35e51a165186f9034eff158f3d490b3fed9f8a0"},
+    {file = "pydantic_core-2.18.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4456f2dca97c425231d7315737d45239b2b51a50dc2b6f0c2bb181fce6207664"},
+    {file = "pydantic_core-2.18.2-cp311-none-win32.whl", hash = "sha256:269322dcc3d8bdb69f054681edff86276b2ff972447863cf34c8b860f5188e2e"},
+    {file = "pydantic_core-2.18.2-cp311-none-win_amd64.whl", hash = "sha256:800d60565aec896f25bc3cfa56d2277d52d5182af08162f7954f938c06dc4ee3"},
+    {file = "pydantic_core-2.18.2-cp311-none-win_arm64.whl", hash = "sha256:1404c69d6a676245199767ba4f633cce5f4ad4181f9d0ccb0577e1f66cf4c46d"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:fb2bd7be70c0fe4dfd32c951bc813d9fe6ebcbfdd15a07527796c8204bd36242"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6132dd3bd52838acddca05a72aafb6eab6536aa145e923bb50f45e78b7251043"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d904828195733c183d20a54230c0df0eb46ec746ea1a666730787353e87182"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c9bd70772c720142be1020eac55f8143a34ec9f82d75a8e7a07852023e46617f"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2b8ed04b3582771764538f7ee7001b02e1170223cf9b75dff0bc698fadb00cf3"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6dac87ddb34aaec85f873d737e9d06a3555a1cc1a8e0c44b7f8d5daeb89d86f"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca4ae5a27ad7a4ee5170aebce1574b375de390bc01284f87b18d43a3984df72"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:886eec03591b7cf058467a70a87733b35f44707bd86cf64a615584fd72488b7c"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:ca7b0c1f1c983e064caa85f3792dd2fe3526b3505378874afa84baf662e12241"},
+    {file = "pydantic_core-2.18.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4b4356d3538c3649337df4074e81b85f0616b79731fe22dd11b99499b2ebbdf3"},
+    {file = "pydantic_core-2.18.2-cp312-none-win32.whl", hash = "sha256:8b172601454f2d7701121bbec3425dd71efcb787a027edf49724c9cefc14c038"},
+    {file = "pydantic_core-2.18.2-cp312-none-win_amd64.whl", hash = "sha256:b1bd7e47b1558ea872bd16c8502c414f9e90dcf12f1395129d7bb42a09a95438"},
+    {file = "pydantic_core-2.18.2-cp312-none-win_arm64.whl", hash = "sha256:98758d627ff397e752bc339272c14c98199c613f922d4a384ddc07526c86a2ec"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9fdad8e35f278b2c3eb77cbdc5c0a49dada440657bf738d6905ce106dc1de439"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1d90c3265ae107f91a4f279f4d6f6f1d4907ac76c6868b27dc7fb33688cfb347"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:390193c770399861d8df9670fb0d1874f330c79caaca4642332df7c682bf6b91"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:82d5d4d78e4448683cb467897fe24e2b74bb7b973a541ea1dcfec1d3cbce39fb"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4774f3184d2ef3e14e8693194f661dea5a4d6ca4e3dc8e39786d33a94865cefd"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4d938ec0adf5167cb335acb25a4ee69a8107e4984f8fbd2e897021d9e4ca21b"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e0e8b1be28239fc64a88a8189d1df7fad8be8c1ae47fcc33e43d4be15f99cc70"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:868649da93e5a3d5eacc2b5b3b9235c98ccdbfd443832f31e075f54419e1b96b"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:78363590ef93d5d226ba21a90a03ea89a20738ee5b7da83d771d283fd8a56761"},
+    {file = "pydantic_core-2.18.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:852e966fbd035a6468fc0a3496589b45e2208ec7ca95c26470a54daed82a0788"},
+    {file = "pydantic_core-2.18.2-cp38-none-win32.whl", hash = "sha256:6a46e22a707e7ad4484ac9ee9f290f9d501df45954184e23fc29408dfad61350"},
+    {file = "pydantic_core-2.18.2-cp38-none-win_amd64.whl", hash = "sha256:d91cb5ea8b11607cc757675051f61b3d93f15eca3cefb3e6c704a5d6e8440f4e"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ae0a8a797a5e56c053610fa7be147993fe50960fa43609ff2a9552b0e07013e8"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:042473b6280246b1dbf530559246f6842b56119c2926d1e52b631bdc46075f2a"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a388a77e629b9ec814c1b1e6b3b595fe521d2cdc625fcca26fbc2d44c816804"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25add29b8f3b233ae90ccef2d902d0ae0432eb0d45370fe315d1a5cf231004b"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f459a5ce8434614dfd39bbebf1041952ae01da6bed9855008cb33b875cb024c0"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eff2de745698eb46eeb51193a9f41d67d834d50e424aef27df2fcdee1b153845"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8309f67285bdfe65c372ea3722b7a5642680f3dba538566340a9d36e920b5f0"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f93a8a2e3938ff656a7c1bc57193b1319960ac015b6e87d76c76bf14fe0244b4"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:22057013c8c1e272eb8d0eebc796701167d8377441ec894a8fed1af64a0bf399"},
+    {file = "pydantic_core-2.18.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfeecd1ac6cc1fb2692c3d5110781c965aabd4ec5d32799773ca7b1456ac636b"},
+    {file = "pydantic_core-2.18.2-cp39-none-win32.whl", hash = "sha256:0d69b4c2f6bb3e130dba60d34c0845ba31b69babdd3f78f7c0c8fae5021a253e"},
+    {file = "pydantic_core-2.18.2-cp39-none-win_amd64.whl", hash = "sha256:d9319e499827271b09b4e411905b24a426b8fb69464dfa1696258f53a3334641"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a1874c6dd4113308bd0eb568418e6114b252afe44319ead2b4081e9b9521fe75"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ccdd111c03bfd3666bd2472b674c6899550e09e9f298954cfc896ab92b5b0e6d"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e18609ceaa6eed63753037fc06ebb16041d17d28199ae5aba0052c51449650a9"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e5c584d357c4e2baf0ff7baf44f4994be121e16a2c88918a5817331fc7599d7"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43f0f463cf89ace478de71a318b1b4f05ebc456a9b9300d027b4b57c1a2064fb"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e1b395e58b10b73b07b7cf740d728dd4ff9365ac46c18751bf8b3d8cca8f625a"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0098300eebb1c837271d3d1a2cd2911e7c11b396eac9661655ee524a7f10587b"},
+    {file = "pydantic_core-2.18.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:36789b70d613fbac0a25bb07ab3d9dba4d2e38af609c020cf4d888d165ee0bf3"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3f9a801e7c8f1ef8718da265bba008fa121243dfe37c1cea17840b0944dfd72c"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:3a6515ebc6e69d85502b4951d89131ca4e036078ea35533bb76327f8424531ce"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:20aca1e2298c56ececfd8ed159ae4dde2df0781988c97ef77d5c16ff4bd5b400"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:223ee893d77a310a0391dca6df00f70bbc2f36a71a895cecd9a0e762dc37b349"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2334ce8c673ee93a1d6a65bd90327588387ba073c17e61bf19b4fd97d688d63c"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:cbca948f2d14b09d20268cda7b0367723d79063f26c4ffc523af9042cad95592"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b3ef08e20ec49e02d5c6717a91bb5af9b20f1805583cb0adfe9ba2c6b505b5ae"},
+    {file = "pydantic_core-2.18.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6fdc8627910eed0c01aed6a390a252fe3ea6d472ee70fdde56273f198938374"},
+    {file = "pydantic_core-2.18.2.tar.gz", hash = "sha256:2e29d20810dfc3043ee13ac7d9e25105799817683348823f305ab3f349b9386e"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
+
 [[package]]
 name = "pyreadline3"
 version = "3.4.1"
@@ -1860,13 +2316,13 @@ files = [
 
 [[package]]
 name = "pytest"
-version = "7.4.3"
+version = "7.4.4"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "pytest-7.4.3-py3-none-any.whl", hash = "sha256:0d009c083ea859a71b76adf7c1d502e4bc170b80a8ef002da5806527b9591fac"},
-    {file = "pytest-7.4.3.tar.gz", hash = "sha256:d989d136982de4e3b29dabcc838ad581c64e8ed52c11fbe86ddebd9da0818cd5"},
+    {file = "pytest-7.4.4-py3-none-any.whl", hash = "sha256:b090cdf5ed60bf4c45261be03239c2c1c22df034fbffe691abe93cd80cea01d8"},
+    {file = "pytest-7.4.4.tar.gz", hash = "sha256:2cf0005922c6ace4a3e2ec8b4080eb0d9753fdc93107415332f50ce9e7994280"},
 ]
 
 [package.dependencies]
@@ -1882,13 +2338,13 @@ testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "no
 
 [[package]]
 name = "python-dateutil"
-version = "2.8.2"
+version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
-    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
-    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
 ]
 
 [package.dependencies]
@@ -1896,13 +2352,13 @@ six = ">=1.5"
 
 [[package]]
 name = "pytz"
-version = "2023.3.post1"
+version = "2024.1"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
 files = [
-    {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"},
-    {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"},
+    {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"},
+    {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"},
 ]
 
 [[package]]
@@ -1965,101 +2421,121 @@ files = [
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
 ]
 
+[[package]]
+name = "referencing"
+version = "0.35.0"
+description = "JSON Referencing + Python"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "referencing-0.35.0-py3-none-any.whl", hash = "sha256:8080727b30e364e5783152903672df9b6b091c926a146a759080b62ca3126cd6"},
+    {file = "referencing-0.35.0.tar.gz", hash = "sha256:191e936b0c696d0af17ad7430a3dc68e88bc11be6514f4757dc890f04ab05889"},
+]
+
+[package.dependencies]
+attrs = ">=22.2.0"
+rpds-py = ">=0.7.0"
+
 [[package]]
 name = "regex"
-version = "2023.10.3"
+version = "2024.4.16"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "regex-2023.10.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4c34d4f73ea738223a094d8e0ffd6d2c1a1b4c175da34d6b0de3d8d69bee6bcc"},
-    {file = "regex-2023.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a8f4e49fc3ce020f65411432183e6775f24e02dff617281094ba6ab079ef0915"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4cd1bccf99d3ef1ab6ba835308ad85be040e6a11b0977ef7ea8c8005f01a3c29"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81dce2ddc9f6e8f543d94b05d56e70d03a0774d32f6cca53e978dc01e4fc75b8"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c6b4d23c04831e3ab61717a707a5d763b300213db49ca680edf8bf13ab5d91b"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c15ad0aee158a15e17e0495e1e18741573d04eb6da06d8b84af726cfc1ed02ee"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6239d4e2e0b52c8bd38c51b760cd870069f0bdf99700a62cd509d7a031749a55"},
-    {file = "regex-2023.10.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4a8bf76e3182797c6b1afa5b822d1d5802ff30284abe4599e1247be4fd6b03be"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:d9c727bbcf0065cbb20f39d2b4f932f8fa1631c3e01fcedc979bd4f51fe051c5"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:3ccf2716add72f80714b9a63899b67fa711b654be3fcdd34fa391d2d274ce767"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:107ac60d1bfdc3edb53be75e2a52aff7481b92817cfdddd9b4519ccf0e54a6ff"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:00ba3c9818e33f1fa974693fb55d24cdc8ebafcb2e4207680669d8f8d7cca79a"},
-    {file = "regex-2023.10.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f0a47efb1dbef13af9c9a54a94a0b814902e547b7f21acb29434504d18f36e3a"},
-    {file = "regex-2023.10.3-cp310-cp310-win32.whl", hash = "sha256:36362386b813fa6c9146da6149a001b7bd063dabc4d49522a1f7aa65b725c7ec"},
-    {file = "regex-2023.10.3-cp310-cp310-win_amd64.whl", hash = "sha256:c65a3b5330b54103e7d21cac3f6bf3900d46f6d50138d73343d9e5b2900b2353"},
-    {file = "regex-2023.10.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90a79bce019c442604662d17bf69df99090e24cdc6ad95b18b6725c2988a490e"},
-    {file = "regex-2023.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c7964c2183c3e6cce3f497e3a9f49d182e969f2dc3aeeadfa18945ff7bdd7051"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ef80829117a8061f974b2fda8ec799717242353bff55f8a29411794d635d964"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5addc9d0209a9afca5fc070f93b726bf7003bd63a427f65ef797a931782e7edc"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c148bec483cc4b421562b4bcedb8e28a3b84fcc8f0aa4418e10898f3c2c0eb9b"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d1f21af4c1539051049796a0f50aa342f9a27cde57318f2fc41ed50b0dbc4ac"},
-    {file = "regex-2023.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b9ac09853b2a3e0d0082104036579809679e7715671cfbf89d83c1cb2a30f58"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ebedc192abbc7fd13c5ee800e83a6df252bec691eb2c4bedc9f8b2e2903f5e2a"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d8a993c0a0ffd5f2d3bda23d0cd75e7086736f8f8268de8a82fbc4bd0ac6791e"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:be6b7b8d42d3090b6c80793524fa66c57ad7ee3fe9722b258aec6d0672543fd0"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4023e2efc35a30e66e938de5aef42b520c20e7eda7bb5fb12c35e5d09a4c43f6"},
-    {file = "regex-2023.10.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0d47840dc05e0ba04fe2e26f15126de7c755496d5a8aae4a08bda4dd8d646c54"},
-    {file = "regex-2023.10.3-cp311-cp311-win32.whl", hash = "sha256:9145f092b5d1977ec8c0ab46e7b3381b2fd069957b9862a43bd383e5c01d18c2"},
-    {file = "regex-2023.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:b6104f9a46bd8743e4f738afef69b153c4b8b592d35ae46db07fc28ae3d5fb7c"},
-    {file = "regex-2023.10.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:bff507ae210371d4b1fe316d03433ac099f184d570a1a611e541923f78f05037"},
-    {file = "regex-2023.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be5e22bbb67924dea15039c3282fa4cc6cdfbe0cbbd1c0515f9223186fc2ec5f"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a992f702c9be9c72fa46f01ca6e18d131906a7180950958f766c2aa294d4b41"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7434a61b158be563c1362d9071358f8ab91b8d928728cd2882af060481244c9e"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c2169b2dcabf4e608416f7f9468737583ce5f0a6e8677c4efbf795ce81109d7c"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9e908ef5889cda4de038892b9accc36d33d72fb3e12c747e2799a0e806ec841"},
-    {file = "regex-2023.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12bd4bc2c632742c7ce20db48e0d99afdc05e03f0b4c1af90542e05b809a03d9"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bc72c231f5449d86d6c7d9cc7cd819b6eb30134bb770b8cfdc0765e48ef9c420"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bce8814b076f0ce5766dc87d5a056b0e9437b8e0cd351b9a6c4e1134a7dfbda9"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:ba7cd6dc4d585ea544c1412019921570ebd8a597fabf475acc4528210d7c4a6f"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b0c7d2f698e83f15228ba41c135501cfe7d5740181d5903e250e47f617eb4292"},
-    {file = "regex-2023.10.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5a8f91c64f390ecee09ff793319f30a0f32492e99f5dc1c72bc361f23ccd0a9a"},
-    {file = "regex-2023.10.3-cp312-cp312-win32.whl", hash = "sha256:ad08a69728ff3c79866d729b095872afe1e0557251da4abb2c5faff15a91d19a"},
-    {file = "regex-2023.10.3-cp312-cp312-win_amd64.whl", hash = "sha256:39cdf8d141d6d44e8d5a12a8569d5a227f645c87df4f92179bd06e2e2705e76b"},
-    {file = "regex-2023.10.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4a3ee019a9befe84fa3e917a2dd378807e423d013377a884c1970a3c2792d293"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76066d7ff61ba6bf3cb5efe2428fc82aac91802844c022d849a1f0f53820502d"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe50b61bab1b1ec260fa7cd91106fa9fece57e6beba05630afe27c71259c59b"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9fd88f373cb71e6b59b7fa597e47e518282455c2734fd4306a05ca219a1991b0"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3ab05a182c7937fb374f7e946f04fb23a0c0699c0450e9fb02ef567412d2fa3"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dac37cf08fcf2094159922edc7a2784cfcc5c70f8354469f79ed085f0328ebdf"},
-    {file = "regex-2023.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e54ddd0bb8fb626aa1f9ba7b36629564544954fff9669b15da3610c22b9a0991"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:3367007ad1951fde612bf65b0dffc8fd681a4ab98ac86957d16491400d661302"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:16f8740eb6dbacc7113e3097b0a36065a02e37b47c936b551805d40340fb9971"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:f4f2ca6df64cbdd27f27b34f35adb640b5d2d77264228554e68deda54456eb11"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:39807cbcbe406efca2a233884e169d056c35aa7e9f343d4e78665246a332f597"},
-    {file = "regex-2023.10.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7eece6fbd3eae4a92d7c748ae825cbc1ee41a89bb1c3db05b5578ed3cfcfd7cb"},
-    {file = "regex-2023.10.3-cp37-cp37m-win32.whl", hash = "sha256:ce615c92d90df8373d9e13acddd154152645c0dc060871abf6bd43809673d20a"},
-    {file = "regex-2023.10.3-cp37-cp37m-win_amd64.whl", hash = "sha256:0f649fa32fe734c4abdfd4edbb8381c74abf5f34bc0b3271ce687b23729299ed"},
-    {file = "regex-2023.10.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9b98b7681a9437262947f41c7fac567c7e1f6eddd94b0483596d320092004533"},
-    {file = "regex-2023.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:91dc1d531f80c862441d7b66c4505cd6ea9d312f01fb2f4654f40c6fdf5cc37a"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:82fcc1f1cc3ff1ab8a57ba619b149b907072e750815c5ba63e7aa2e1163384a4"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7979b834ec7a33aafae34a90aad9f914c41fd6eaa8474e66953f3f6f7cbd4368"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef71561f82a89af6cfcbee47f0fabfdb6e63788a9258e913955d89fdd96902ab"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd829712de97753367153ed84f2de752b86cd1f7a88b55a3a775eb52eafe8a94"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00e871d83a45eee2f8688d7e6849609c2ca2a04a6d48fba3dff4deef35d14f07"},
-    {file = "regex-2023.10.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:706e7b739fdd17cb89e1fbf712d9dc21311fc2333f6d435eac2d4ee81985098c"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cc3f1c053b73f20c7ad88b0d1d23be7e7b3901229ce89f5000a8399746a6e039"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6f85739e80d13644b981a88f529d79c5bdf646b460ba190bffcaf6d57b2a9863"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:741ba2f511cc9626b7561a440f87d658aabb3d6b744a86a3c025f866b4d19e7f"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e77c90ab5997e85901da85131fd36acd0ed2221368199b65f0d11bca44549711"},
-    {file = "regex-2023.10.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:979c24cbefaf2420c4e377ecd1f165ea08cc3d1fbb44bdc51bccbbf7c66a2cb4"},
-    {file = "regex-2023.10.3-cp38-cp38-win32.whl", hash = "sha256:58837f9d221744d4c92d2cf7201c6acd19623b50c643b56992cbd2b745485d3d"},
-    {file = "regex-2023.10.3-cp38-cp38-win_amd64.whl", hash = "sha256:c55853684fe08d4897c37dfc5faeff70607a5f1806c8be148f1695be4a63414b"},
-    {file = "regex-2023.10.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2c54e23836650bdf2c18222c87f6f840d4943944146ca479858404fedeb9f9af"},
-    {file = "regex-2023.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:69c0771ca5653c7d4b65203cbfc5e66db9375f1078689459fe196fe08b7b4930"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ac965a998e1388e6ff2e9781f499ad1eaa41e962a40d11c7823c9952c77123e"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c0e8fae5b27caa34177bdfa5a960c46ff2f78ee2d45c6db15ae3f64ecadde14"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c56c3d47da04f921b73ff9415fbaa939f684d47293f071aa9cbb13c94afc17d"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ef1e014eed78ab650bef9a6a9cbe50b052c0aebe553fb2881e0453717573f52"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d29338556a59423d9ff7b6eb0cb89ead2b0875e08fe522f3e068b955c3e7b59b"},
-    {file = "regex-2023.10.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9c6d0ced3c06d0f183b73d3c5920727268d2201aa0fe6d55c60d68c792ff3588"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:994645a46c6a740ee8ce8df7911d4aee458d9b1bc5639bc968226763d07f00fa"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:66e2fe786ef28da2b28e222c89502b2af984858091675044d93cb50e6f46d7af"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:11175910f62b2b8c055f2b089e0fedd694fe2be3941b3e2633653bc51064c528"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:06e9abc0e4c9ab4779c74ad99c3fc10d3967d03114449acc2c2762ad4472b8ca"},
-    {file = "regex-2023.10.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:fb02e4257376ae25c6dd95a5aec377f9b18c09be6ebdefa7ad209b9137b73d48"},
-    {file = "regex-2023.10.3-cp39-cp39-win32.whl", hash = "sha256:3b2c3502603fab52d7619b882c25a6850b766ebd1b18de3df23b2f939360e1bd"},
-    {file = "regex-2023.10.3-cp39-cp39-win_amd64.whl", hash = "sha256:adbccd17dcaff65704c856bd29951c58a1bd4b2b0f8ad6b826dbd543fe740988"},
-    {file = "regex-2023.10.3.tar.gz", hash = "sha256:3fef4f844d2290ee0ba57addcec17eec9e3df73f10a2748485dfd6a3a188cc0f"},
+    {file = "regex-2024.4.16-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fb83cc090eac63c006871fd24db5e30a1f282faa46328572661c0a24a2323a08"},
+    {file = "regex-2024.4.16-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8c91e1763696c0eb66340c4df98623c2d4e77d0746b8f8f2bee2c6883fd1fe18"},
+    {file = "regex-2024.4.16-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:10188fe732dec829c7acca7422cdd1bf57d853c7199d5a9e96bb4d40db239c73"},
+    {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:956b58d692f235cfbf5b4f3abd6d99bf102f161ccfe20d2fd0904f51c72c4c66"},
+    {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a70b51f55fd954d1f194271695821dd62054d949efd6368d8be64edd37f55c86"},
+    {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c02fcd2bf45162280613d2e4a1ca3ac558ff921ae4e308ecb307650d3a6ee51"},
+    {file = "regex-2024.4.16-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c4ed75ea6892a56896d78f11006161eea52c45a14994794bcfa1654430984b22"},
+    {file = "regex-2024.4.16-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd727ad276bb91928879f3aa6396c9a1d34e5e180dce40578421a691eeb77f47"},
+    {file = "regex-2024.4.16-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7cbc5d9e8a1781e7be17da67b92580d6ce4dcef5819c1b1b89f49d9678cc278c"},
+    {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:78fddb22b9ef810b63ef341c9fcf6455232d97cfe03938cbc29e2672c436670e"},
+    {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:445ca8d3c5a01309633a0c9db57150312a181146315693273e35d936472df912"},
+    {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:95399831a206211d6bc40224af1c635cb8790ddd5c7493e0bd03b85711076a53"},
+    {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:7731728b6568fc286d86745f27f07266de49603a6fdc4d19c87e8c247be452af"},
+    {file = "regex-2024.4.16-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4facc913e10bdba42ec0aee76d029aedda628161a7ce4116b16680a0413f658a"},
+    {file = "regex-2024.4.16-cp310-cp310-win32.whl", hash = "sha256:911742856ce98d879acbea33fcc03c1d8dc1106234c5e7d068932c945db209c0"},
+    {file = "regex-2024.4.16-cp310-cp310-win_amd64.whl", hash = "sha256:e0a2df336d1135a0b3a67f3bbf78a75f69562c1199ed9935372b82215cddd6e2"},
+    {file = "regex-2024.4.16-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1210365faba7c2150451eb78ec5687871c796b0f1fa701bfd2a4a25420482d26"},
+    {file = "regex-2024.4.16-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9ab40412f8cd6f615bfedea40c8bf0407d41bf83b96f6fc9ff34976d6b7037fd"},
+    {file = "regex-2024.4.16-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fd80d1280d473500d8086d104962a82d77bfbf2b118053824b7be28cd5a79ea5"},
+    {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bb966fdd9217e53abf824f437a5a2d643a38d4fd5fd0ca711b9da683d452969"},
+    {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:20b7a68444f536365af42a75ccecb7ab41a896a04acf58432db9e206f4e525d6"},
+    {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b74586dd0b039c62416034f811d7ee62810174bb70dffcca6439f5236249eb09"},
+    {file = "regex-2024.4.16-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c8290b44d8b0af4e77048646c10c6e3aa583c1ca67f3b5ffb6e06cf0c6f0f89"},
+    {file = "regex-2024.4.16-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2d80a6749724b37853ece57988b39c4e79d2b5fe2869a86e8aeae3bbeef9eb0"},
+    {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3a1018e97aeb24e4f939afcd88211ace472ba566efc5bdf53fd8fd7f41fa7170"},
+    {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8d015604ee6204e76569d2f44e5a210728fa917115bef0d102f4107e622b08d5"},
+    {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:3d5ac5234fb5053850d79dd8eb1015cb0d7d9ed951fa37aa9e6249a19aa4f336"},
+    {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:0a38d151e2cdd66d16dab550c22f9521ba79761423b87c01dae0a6e9add79c0d"},
+    {file = "regex-2024.4.16-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:159dc4e59a159cb8e4e8f8961eb1fa5d58f93cb1acd1701d8aff38d45e1a84a6"},
+    {file = "regex-2024.4.16-cp311-cp311-win32.whl", hash = "sha256:ba2336d6548dee3117520545cfe44dc28a250aa091f8281d28804aa8d707d93d"},
+    {file = "regex-2024.4.16-cp311-cp311-win_amd64.whl", hash = "sha256:8f83b6fd3dc3ba94d2b22717f9c8b8512354fd95221ac661784df2769ea9bba9"},
+    {file = "regex-2024.4.16-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:80b696e8972b81edf0af2a259e1b2a4a661f818fae22e5fa4fa1a995fb4a40fd"},
+    {file = "regex-2024.4.16-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d61ae114d2a2311f61d90c2ef1358518e8f05eafda76eaf9c772a077e0b465ec"},
+    {file = "regex-2024.4.16-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8ba6745440b9a27336443b0c285d705ce73adb9ec90e2f2004c64d95ab5a7598"},
+    {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295004b2dd37b0835ea5c14a33e00e8cfa3c4add4d587b77287825f3418d310"},
+    {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4aba818dcc7263852aabb172ec27b71d2abca02a593b95fa79351b2774eb1d2b"},
+    {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0800631e565c47520aaa04ae38b96abc5196fe8b4aa9bd864445bd2b5848a7a"},
+    {file = "regex-2024.4.16-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08dea89f859c3df48a440dbdcd7b7155bc675f2fa2ec8c521d02dc69e877db70"},
+    {file = "regex-2024.4.16-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eeaa0b5328b785abc344acc6241cffde50dc394a0644a968add75fcefe15b9d4"},
+    {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:4e819a806420bc010489f4e741b3036071aba209f2e0989d4750b08b12a9343f"},
+    {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:c2d0e7cbb6341e830adcbfa2479fdeebbfbb328f11edd6b5675674e7a1e37730"},
+    {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:91797b98f5e34b6a49f54be33f72e2fb658018ae532be2f79f7c63b4ae225145"},
+    {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:d2da13568eff02b30fd54fccd1e042a70fe920d816616fda4bf54ec705668d81"},
+    {file = "regex-2024.4.16-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:370c68dc5570b394cbaadff50e64d705f64debed30573e5c313c360689b6aadc"},
+    {file = "regex-2024.4.16-cp312-cp312-win32.whl", hash = "sha256:904c883cf10a975b02ab3478bce652f0f5346a2c28d0a8521d97bb23c323cc8b"},
+    {file = "regex-2024.4.16-cp312-cp312-win_amd64.whl", hash = "sha256:785c071c982dce54d44ea0b79cd6dfafddeccdd98cfa5f7b86ef69b381b457d9"},
+    {file = "regex-2024.4.16-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e2f142b45c6fed48166faeb4303b4b58c9fcd827da63f4cf0a123c3480ae11fb"},
+    {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e87ab229332ceb127a165612d839ab87795972102cb9830e5f12b8c9a5c1b508"},
+    {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:81500ed5af2090b4a9157a59dbc89873a25c33db1bb9a8cf123837dcc9765047"},
+    {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b340cccad138ecb363324aa26893963dcabb02bb25e440ebdf42e30963f1a4e0"},
+    {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c72608e70f053643437bd2be0608f7f1c46d4022e4104d76826f0839199347a"},
+    {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a01fe2305e6232ef3e8f40bfc0f0f3a04def9aab514910fa4203bafbc0bb4682"},
+    {file = "regex-2024.4.16-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:03576e3a423d19dda13e55598f0fd507b5d660d42c51b02df4e0d97824fdcae3"},
+    {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:549c3584993772e25f02d0656ac48abdda73169fe347263948cf2b1cead622f3"},
+    {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:34422d5a69a60b7e9a07a690094e824b66f5ddc662a5fc600d65b7c174a05f04"},
+    {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:5f580c651a72b75c39e311343fe6875d6f58cf51c471a97f15a938d9fe4e0d37"},
+    {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:3399dd8a7495bbb2bacd59b84840eef9057826c664472e86c91d675d007137f5"},
+    {file = "regex-2024.4.16-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8d1f86f3f4e2388aa3310b50694ac44daefbd1681def26b4519bd050a398dc5a"},
+    {file = "regex-2024.4.16-cp37-cp37m-win32.whl", hash = "sha256:dd5acc0a7d38fdc7a3a6fd3ad14c880819008ecb3379626e56b163165162cc46"},
+    {file = "regex-2024.4.16-cp37-cp37m-win_amd64.whl", hash = "sha256:ba8122e3bb94ecda29a8de4cf889f600171424ea586847aa92c334772d200331"},
+    {file = "regex-2024.4.16-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:743deffdf3b3481da32e8a96887e2aa945ec6685af1cfe2bcc292638c9ba2f48"},
+    {file = "regex-2024.4.16-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:7571f19f4a3fd00af9341c7801d1ad1967fc9c3f5e62402683047e7166b9f2b4"},
+    {file = "regex-2024.4.16-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:df79012ebf6f4efb8d307b1328226aef24ca446b3ff8d0e30202d7ebcb977a8c"},
+    {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e757d475953269fbf4b441207bb7dbdd1c43180711b6208e129b637792ac0b93"},
+    {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4313ab9bf6a81206c8ac28fdfcddc0435299dc88cad12cc6305fd0e78b81f9e4"},
+    {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d83c2bc678453646f1a18f8db1e927a2d3f4935031b9ad8a76e56760461105dd"},
+    {file = "regex-2024.4.16-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9df1bfef97db938469ef0a7354b2d591a2d438bc497b2c489471bec0e6baf7c4"},
+    {file = "regex-2024.4.16-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62120ed0de69b3649cc68e2965376048793f466c5a6c4370fb27c16c1beac22d"},
+    {file = "regex-2024.4.16-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c2ef6f7990b6e8758fe48ad08f7e2f66c8f11dc66e24093304b87cae9037bb4a"},
+    {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8fc6976a3395fe4d1fbeb984adaa8ec652a1e12f36b56ec8c236e5117b585427"},
+    {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:03e68f44340528111067cecf12721c3df4811c67268b897fbe695c95f860ac42"},
+    {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ec7e0043b91115f427998febaa2beb82c82df708168b35ece3accb610b91fac1"},
+    {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c21fc21a4c7480479d12fd8e679b699f744f76bb05f53a1d14182b31f55aac76"},
+    {file = "regex-2024.4.16-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:12f6a3f2f58bb7344751919a1876ee1b976fe08b9ffccb4bbea66f26af6017b9"},
+    {file = "regex-2024.4.16-cp38-cp38-win32.whl", hash = "sha256:479595a4fbe9ed8f8f72c59717e8cf222da2e4c07b6ae5b65411e6302af9708e"},
+    {file = "regex-2024.4.16-cp38-cp38-win_amd64.whl", hash = "sha256:0534b034fba6101611968fae8e856c1698da97ce2efb5c2b895fc8b9e23a5834"},
+    {file = "regex-2024.4.16-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a7ccdd1c4a3472a7533b0a7aa9ee34c9a2bef859ba86deec07aff2ad7e0c3b94"},
+    {file = "regex-2024.4.16-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6f2f017c5be19984fbbf55f8af6caba25e62c71293213f044da3ada7091a4455"},
+    {file = "regex-2024.4.16-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:803b8905b52de78b173d3c1e83df0efb929621e7b7c5766c0843704d5332682f"},
+    {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:684008ec44ad275832a5a152f6e764bbe1914bea10968017b6feaecdad5736e0"},
+    {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65436dce9fdc0aeeb0a0effe0839cb3d6a05f45aa45a4d9f9c60989beca78b9c"},
+    {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea355eb43b11764cf799dda62c658c4d2fdb16af41f59bb1ccfec517b60bcb07"},
+    {file = "regex-2024.4.16-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98c1165f3809ce7774f05cb74e5408cd3aa93ee8573ae959a97a53db3ca3180d"},
+    {file = "regex-2024.4.16-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cccc79a9be9b64c881f18305a7c715ba199e471a3973faeb7ba84172abb3f317"},
+    {file = "regex-2024.4.16-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:00169caa125f35d1bca6045d65a662af0202704489fada95346cfa092ec23f39"},
+    {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:6cc38067209354e16c5609b66285af17a2863a47585bcf75285cab33d4c3b8df"},
+    {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:23cff1b267038501b179ccbbd74a821ac4a7192a1852d1d558e562b507d46013"},
+    {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:b9d320b3bf82a39f248769fc7f188e00f93526cc0fe739cfa197868633d44701"},
+    {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:89ec7f2c08937421bbbb8b48c54096fa4f88347946d4747021ad85f1b3021b3c"},
+    {file = "regex-2024.4.16-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4918fd5f8b43aa7ec031e0fef1ee02deb80b6afd49c85f0790be1dc4ce34cb50"},
+    {file = "regex-2024.4.16-cp39-cp39-win32.whl", hash = "sha256:684e52023aec43bdf0250e843e1fdd6febbe831bd9d52da72333fa201aaa2335"},
+    {file = "regex-2024.4.16-cp39-cp39-win_amd64.whl", hash = "sha256:e697e1c0238133589e00c244a8b676bc2cfc3ab4961318d902040d099fec7483"},
+    {file = "regex-2024.4.16.tar.gz", hash = "sha256:fa454d26f2e87ad661c4f0c5a5fe4cf6aab1e307d1b94f16ffdfcb089ba685c0"},
 ]
 
 [[package]]
@@ -2083,123 +2559,221 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "rpds-py"
+version = "0.18.0"
+description = "Python bindings to Rust's persistent data structures (rpds)"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "rpds_py-0.18.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5b4e7d8d6c9b2e8ee2d55c90b59c707ca59bc30058269b3db7b1f8df5763557e"},
+    {file = "rpds_py-0.18.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c463ed05f9dfb9baebef68048aed8dcdc94411e4bf3d33a39ba97e271624f8f7"},
+    {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01e36a39af54a30f28b73096dd39b6802eddd04c90dbe161c1b8dbe22353189f"},
+    {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d62dec4976954a23d7f91f2f4530852b0c7608116c257833922a896101336c51"},
+    {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd18772815d5f008fa03d2b9a681ae38d5ae9f0e599f7dda233c439fcaa00d40"},
+    {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:923d39efa3cfb7279a0327e337a7958bff00cc447fd07a25cddb0a1cc9a6d2da"},
+    {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39514da80f971362f9267c600b6d459bfbbc549cffc2cef8e47474fddc9b45b1"},
+    {file = "rpds_py-0.18.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a34d557a42aa28bd5c48a023c570219ba2593bcbbb8dc1b98d8cf5d529ab1434"},
+    {file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:93df1de2f7f7239dc9cc5a4a12408ee1598725036bd2dedadc14d94525192fc3"},
+    {file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:34b18ba135c687f4dac449aa5157d36e2cbb7c03cbea4ddbd88604e076aa836e"},
+    {file = "rpds_py-0.18.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c0b5dcf9193625afd8ecc92312d6ed78781c46ecbf39af9ad4681fc9f464af88"},
+    {file = "rpds_py-0.18.0-cp310-none-win32.whl", hash = "sha256:c4325ff0442a12113a6379af66978c3fe562f846763287ef66bdc1d57925d337"},
+    {file = "rpds_py-0.18.0-cp310-none-win_amd64.whl", hash = "sha256:7223a2a5fe0d217e60a60cdae28d6949140dde9c3bcc714063c5b463065e3d66"},
+    {file = "rpds_py-0.18.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:3a96e0c6a41dcdba3a0a581bbf6c44bb863f27c541547fb4b9711fd8cf0ffad4"},
+    {file = "rpds_py-0.18.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30f43887bbae0d49113cbaab729a112251a940e9b274536613097ab8b4899cf6"},
+    {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fcb25daa9219b4cf3a0ab24b0eb9a5cc8949ed4dc72acb8fa16b7e1681aa3c58"},
+    {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d68c93e381010662ab873fea609bf6c0f428b6d0bb00f2c6939782e0818d37bf"},
+    {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b34b7aa8b261c1dbf7720b5d6f01f38243e9b9daf7e6b8bc1fd4657000062f2c"},
+    {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e6d75ab12b0bbab7215e5d40f1e5b738aa539598db27ef83b2ec46747df90e1"},
+    {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b8612cd233543a3781bc659c731b9d607de65890085098986dfd573fc2befe5"},
+    {file = "rpds_py-0.18.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aec493917dd45e3c69d00a8874e7cbed844efd935595ef78a0f25f14312e33c6"},
+    {file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:661d25cbffaf8cc42e971dd570d87cb29a665f49f4abe1f9e76be9a5182c4688"},
+    {file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1df3659d26f539ac74fb3b0c481cdf9d725386e3552c6fa2974f4d33d78e544b"},
+    {file = "rpds_py-0.18.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1ce3ba137ed54f83e56fb983a5859a27d43a40188ba798993812fed73c70836"},
+    {file = "rpds_py-0.18.0-cp311-none-win32.whl", hash = "sha256:69e64831e22a6b377772e7fb337533c365085b31619005802a79242fee620bc1"},
+    {file = "rpds_py-0.18.0-cp311-none-win_amd64.whl", hash = "sha256:998e33ad22dc7ec7e030b3df701c43630b5bc0d8fbc2267653577e3fec279afa"},
+    {file = "rpds_py-0.18.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7f2facbd386dd60cbbf1a794181e6aa0bd429bd78bfdf775436020172e2a23f0"},
+    {file = "rpds_py-0.18.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1d9a5be316c15ffb2b3c405c4ff14448c36b4435be062a7f578ccd8b01f0c4d8"},
+    {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd5bf1af8efe569654bbef5a3e0a56eca45f87cfcffab31dd8dde70da5982475"},
+    {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5417558f6887e9b6b65b4527232553c139b57ec42c64570569b155262ac0754f"},
+    {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:56a737287efecafc16f6d067c2ea0117abadcd078d58721f967952db329a3e5c"},
+    {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8f03bccbd8586e9dd37219bce4d4e0d3ab492e6b3b533e973fa08a112cb2ffc9"},
+    {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4457a94da0d5c53dc4b3e4de1158bdab077db23c53232f37a3cb7afdb053a4e3"},
+    {file = "rpds_py-0.18.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0ab39c1ba9023914297dd88ec3b3b3c3f33671baeb6acf82ad7ce883f6e8e157"},
+    {file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9d54553c1136b50fd12cc17e5b11ad07374c316df307e4cfd6441bea5fb68496"},
+    {file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0af039631b6de0397ab2ba16eaf2872e9f8fca391b44d3d8cac317860a700a3f"},
+    {file = "rpds_py-0.18.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:84ffab12db93b5f6bad84c712c92060a2d321b35c3c9960b43d08d0f639d60d7"},
+    {file = "rpds_py-0.18.0-cp312-none-win32.whl", hash = "sha256:685537e07897f173abcf67258bee3c05c374fa6fff89d4c7e42fb391b0605e98"},
+    {file = "rpds_py-0.18.0-cp312-none-win_amd64.whl", hash = "sha256:e003b002ec72c8d5a3e3da2989c7d6065b47d9eaa70cd8808b5384fbb970f4ec"},
+    {file = "rpds_py-0.18.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:08f9ad53c3f31dfb4baa00da22f1e862900f45908383c062c27628754af2e88e"},
+    {file = "rpds_py-0.18.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c0013fe6b46aa496a6749c77e00a3eb07952832ad6166bd481c74bda0dcb6d58"},
+    {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e32a92116d4f2a80b629778280103d2a510a5b3f6314ceccd6e38006b5e92dcb"},
+    {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e541ec6f2ec456934fd279a3120f856cd0aedd209fc3852eca563f81738f6861"},
+    {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bed88b9a458e354014d662d47e7a5baafd7ff81c780fd91584a10d6ec842cb73"},
+    {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2644e47de560eb7bd55c20fc59f6daa04682655c58d08185a9b95c1970fa1e07"},
+    {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e8916ae4c720529e18afa0b879473049e95949bf97042e938530e072fde061d"},
+    {file = "rpds_py-0.18.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:465a3eb5659338cf2a9243e50ad9b2296fa15061736d6e26240e713522b6235c"},
+    {file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ea7d4a99f3b38c37eac212dbd6ec42b7a5ec51e2c74b5d3223e43c811609e65f"},
+    {file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:67071a6171e92b6da534b8ae326505f7c18022c6f19072a81dcf40db2638767c"},
+    {file = "rpds_py-0.18.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:41ef53e7c58aa4ef281da975f62c258950f54b76ec8e45941e93a3d1d8580594"},
+    {file = "rpds_py-0.18.0-cp38-none-win32.whl", hash = "sha256:fdea4952db2793c4ad0bdccd27c1d8fdd1423a92f04598bc39425bcc2b8ee46e"},
+    {file = "rpds_py-0.18.0-cp38-none-win_amd64.whl", hash = "sha256:7cd863afe7336c62ec78d7d1349a2f34c007a3cc6c2369d667c65aeec412a5b1"},
+    {file = "rpds_py-0.18.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:5307def11a35f5ae4581a0b658b0af8178c65c530e94893345bebf41cc139d33"},
+    {file = "rpds_py-0.18.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f195baa60a54ef9d2de16fbbfd3ff8b04edc0c0140a761b56c267ac11aa467"},
+    {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39f5441553f1c2aed4de4377178ad8ff8f9d733723d6c66d983d75341de265ab"},
+    {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a00312dea9310d4cb7dbd7787e722d2e86a95c2db92fbd7d0155f97127bcb40"},
+    {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f2fc11e8fe034ee3c34d316d0ad8808f45bc3b9ce5857ff29d513f3ff2923a1"},
+    {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:586f8204935b9ec884500498ccc91aa869fc652c40c093bd9e1471fbcc25c022"},
+    {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddc2f4dfd396c7bfa18e6ce371cba60e4cf9d2e5cdb71376aa2da264605b60b9"},
+    {file = "rpds_py-0.18.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5ddcba87675b6d509139d1b521e0c8250e967e63b5909a7e8f8944d0f90ff36f"},
+    {file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7bd339195d84439cbe5771546fe8a4e8a7a045417d8f9de9a368c434e42a721e"},
+    {file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:d7c36232a90d4755b720fbd76739d8891732b18cf240a9c645d75f00639a9024"},
+    {file = "rpds_py-0.18.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6b0817e34942b2ca527b0e9298373e7cc75f429e8da2055607f4931fded23e20"},
+    {file = "rpds_py-0.18.0-cp39-none-win32.whl", hash = "sha256:99f70b740dc04d09e6b2699b675874367885217a2e9f782bdf5395632ac663b7"},
+    {file = "rpds_py-0.18.0-cp39-none-win_amd64.whl", hash = "sha256:6ef687afab047554a2d366e112dd187b62d261d49eb79b77e386f94644363294"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:ad36cfb355e24f1bd37cac88c112cd7730873f20fb0bdaf8ba59eedf8216079f"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:36b3ee798c58ace201289024b52788161e1ea133e4ac93fba7d49da5fec0ef9e"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8a2f084546cc59ea99fda8e070be2fd140c3092dc11524a71aa8f0f3d5a55ca"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e4461d0f003a0aa9be2bdd1b798a041f177189c1a0f7619fe8c95ad08d9a45d7"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8db715ebe3bb7d86d77ac1826f7d67ec11a70dbd2376b7cc214199360517b641"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:793968759cd0d96cac1e367afd70c235867831983f876a53389ad869b043c948"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66e6a3af5a75363d2c9a48b07cb27c4ea542938b1a2e93b15a503cdfa8490795"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6ef0befbb5d79cf32d0266f5cff01545602344eda89480e1dd88aca964260b18"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1d4acf42190d449d5e89654d5c1ed3a4f17925eec71f05e2a41414689cda02d1"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a5f446dd5055667aabaee78487f2b5ab72e244f9bc0b2ffebfeec79051679984"},
+    {file = "rpds_py-0.18.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9dbbeb27f4e70bfd9eec1be5477517365afe05a9b2c441a0b21929ee61048124"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:22806714311a69fd0af9b35b7be97c18a0fc2826e6827dbb3a8c94eac6cf7eeb"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:b34ae4636dfc4e76a438ab826a0d1eed2589ca7d9a1b2d5bb546978ac6485461"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c8370641f1a7f0e0669ddccca22f1da893cef7628396431eb445d46d893e5cd"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c8362467a0fdeccd47935f22c256bec5e6abe543bf0d66e3d3d57a8fb5731863"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:11a8c85ef4a07a7638180bf04fe189d12757c696eb41f310d2426895356dcf05"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b316144e85316da2723f9d8dc75bada12fa58489a527091fa1d5a612643d1a0e"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf1ea2e34868f6fbf070e1af291c8180480310173de0b0c43fc38a02929fc0e3"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e546e768d08ad55b20b11dbb78a745151acbd938f8f00d0cfbabe8b0199b9880"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4901165d170a5fde6f589acb90a6b33629ad1ec976d4529e769c6f3d885e3e80"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:618a3d6cae6ef8ec88bb76dd80b83cfe415ad4f1d942ca2a903bf6b6ff97a2da"},
+    {file = "rpds_py-0.18.0-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ed4eb745efbff0a8e9587d22a84be94a5eb7d2d99c02dacf7bd0911713ed14dd"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6c81e5f372cd0dc5dc4809553d34f832f60a46034a5f187756d9b90586c2c307"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:43fbac5f22e25bee1d482c97474f930a353542855f05c1161fd804c9dc74a09d"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d7faa6f14017c0b1e69f5e2c357b998731ea75a442ab3841c0dbbbfe902d2c4"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:08231ac30a842bd04daabc4d71fddd7e6d26189406d5a69535638e4dcb88fe76"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:044a3e61a7c2dafacae99d1e722cc2d4c05280790ec5a05031b3876809d89a5c"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3f26b5bd1079acdb0c7a5645e350fe54d16b17bfc5e71f371c449383d3342e17"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:482103aed1dfe2f3b71a58eff35ba105289b8d862551ea576bd15479aba01f66"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1374f4129f9bcca53a1bba0bb86bf78325a0374577cf7e9e4cd046b1e6f20e24"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:635dc434ff724b178cb192c70016cc0ad25a275228f749ee0daf0eddbc8183b1"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:bc362ee4e314870a70f4ae88772d72d877246537d9f8cb8f7eacf10884862432"},
+    {file = "rpds_py-0.18.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:4832d7d380477521a8c1644bbab6588dfedea5e30a7d967b5fb75977c45fd77f"},
+    {file = "rpds_py-0.18.0.tar.gz", hash = "sha256:42821446ee7a76f5d9f71f9e33a4fb2ffd724bb3e7f93386150b61a43115788d"},
+]
+
 [[package]]
 name = "safetensors"
-version = "0.4.2"
+version = "0.4.3"
 description = ""
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "safetensors-0.4.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:69d8bb8384dc2cb5b72c36c4d6980771b293d1a1377b378763f5e37b6bb8d133"},
-    {file = "safetensors-0.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3d420e19fcef96d0067f4de4699682b4bbd85fc8fea0bd45fcd961fdf3e8c82c"},
-    {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9ca54742122fa3c4821754adb67318e1cd25c3a22bbf0c5520d5176e77a099ac"},
-    {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b47aa643afdfd66cf7ce4c184092ae734e15d10aba2c2948f24270211801c3c"},
-    {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d88a16bbc330f27e7f2d4caaf6fb061ad0b8a756ecc4033260b0378e128ce8a2"},
-    {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9223b8ac21085db614a510eb3445e7083cae915a9202357555fa939695d4f57"},
-    {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce6cb86133dc8930a7ab5e7438545a7f205f7a1cdd5aaf108c1d0da6bdcfbc2b"},
-    {file = "safetensors-0.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b8a628e0ae2bbc334b62952c384aa5f41621d01850f8d67b04a96b9c39dd7326"},
-    {file = "safetensors-0.4.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:88d6beb7f811a081e0e5f1d9669fdac816c45340c04b1eaf7ebfda0ce93ea403"},
-    {file = "safetensors-0.4.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b57fc5b1b54cb12d8690a58a4cf4b7144730d4bde9d98aa0e1dab6295a1cd579"},
-    {file = "safetensors-0.4.2-cp310-none-win32.whl", hash = "sha256:9d87a1c98803c16cf113b9ba03f07b2dce5e8eabfd1811a7f7323fcaa2a1bf47"},
-    {file = "safetensors-0.4.2-cp310-none-win_amd64.whl", hash = "sha256:18930ec1d1ecb526d3d9835abc2489b8f1530877518f0c541e77ef0b7abcbd99"},
-    {file = "safetensors-0.4.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c5dd2ed788730ed56b415d1a11c62026b8cc8c573f55a2092afb3ab383e94fff"},
-    {file = "safetensors-0.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cc41791b33efb9c83a59b731619f3d15f543dfe71f3a793cb8fbf9bd5d0d5d71"},
-    {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c888bf71d5ca12a720f1ed87d407c4918afa022fb247a6546d8fac15b1f112b"},
-    {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e6b2feb4b47226a16a792e6fac3f49442714884a3d4c1008569d5068a3941be9"},
-    {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f41cc0ee4b838ae8f4d8364a1b162067693d11a3893f0863be8c228d40e4d0ee"},
-    {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:51b7228e46c0a483c40ba4b9470dea00fb1ff8685026bb4766799000f6328ac2"},
-    {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02697f8f2be8ca3c37a4958702dbdb1864447ef765e18b5328a1617022dcf164"},
-    {file = "safetensors-0.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:27fd8f65cf7c80e4280cae1ee6bcd85c483882f6580821abe71ee1a0d3dcfca7"},
-    {file = "safetensors-0.4.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:c487b5f113b0924c9534a07dc034830fb4ef05ce9bb6d78cfe016a7dedfe281f"},
-    {file = "safetensors-0.4.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:da7f6483f3fe67ff39b3a55552552c67930ea10a36e9f2539d36fc205273d767"},
-    {file = "safetensors-0.4.2-cp311-none-win32.whl", hash = "sha256:52a7012f6cb9cb4a132760b6308daede18a9f5f8952ce08adc7c67a7d865c2d8"},
-    {file = "safetensors-0.4.2-cp311-none-win_amd64.whl", hash = "sha256:4d1361a097ac430b310ce9eed8ed4746edee33ddafdfbb965debc8966fc34dc2"},
-    {file = "safetensors-0.4.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:77af8aa0edcc2863760fd6febbfdb82e88fd75d0e60c1ce4ba57208ba5e4a89b"},
-    {file = "safetensors-0.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846666c1c5a8c8888d2dfda8d3921cb9cb8e2c5f78365be756c11021e75a0a2a"},
-    {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f4bfc7ea19b446bfad41510d4b4c76101698c00caaa8a332c8edd8090a412ef"},
-    {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:233436fd30f27ffeb3c3780d0b84f496518868445c7a8db003639a649cc98453"},
-    {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a09237a795d11cd11f9dae505d170a29b5616151db1e10c14f892b11caadc7d"},
-    {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de01c9a3a3b7b69627d624ff69d9f11d28ce9908eea2fb6245adafa4b1d43df6"},
-    {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c1f25c5069ee42a5bcffdc66c300a407941edd73f3239e9fdefd26216407391"},
-    {file = "safetensors-0.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7a73b3649456d09ca8506140d44484b63154a7378434cc1e8719f8056550b224"},
-    {file = "safetensors-0.4.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e1625a8d07d046e968bd5c4961810aba1225984e4fb9243626f9d04a06ed3fee"},
-    {file = "safetensors-0.4.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8f74c86b25615cb24ad4cff765a2eefc09d71bf0fed97588cf585aad9c38fbb4"},
-    {file = "safetensors-0.4.2-cp312-none-win32.whl", hash = "sha256:8523b9c5777d771bcde5c2389c03f1cdf7ebe8797432a1bd5e345efe25c55987"},
-    {file = "safetensors-0.4.2-cp312-none-win_amd64.whl", hash = "sha256:dcff0243e1737a21f83d664c63fed89d1f532c23fc6830d0427279fabd789ccb"},
-    {file = "safetensors-0.4.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:96ad3d7d472612e26cbe413922b4fb13933310f0511d346ea5cc9a1e856e52eb"},
-    {file = "safetensors-0.4.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:88250922401b5ae4e37de929178caf46be47ed16c817b2237b81679bec07c120"},
-    {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d40443554142fc0ab30652d5cc8554c4b7a613513bde00373e18afd5de8cbe4b"},
-    {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:27f53f70106224d32d874aacecbeb4a6e4c5b16a1d2006d0e876d97229086d71"},
-    {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc068afe23734dfb26ce19db0a7877499ddf73b1d55ceb762417e8da4a1b05fb"},
-    {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9be1918eb8d43a11a6f8806759fccfa0eeb0542b12924caba66af8a7800ad01a"},
-    {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41911087d20a7bbd78cb4ad4f98aab0c431533107584df6635d8b54b99945573"},
-    {file = "safetensors-0.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:50771c662aab909f31e94d048e76861fd027d66076ea773eef2e66c717766e24"},
-    {file = "safetensors-0.4.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:13f2e57be007b7ea9329133d2399e6bdfcf1910f655440a4da17df3a45afcd30"},
-    {file = "safetensors-0.4.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c772147e6395bc829842e0a98e1b30c67fe25d816299c28196488511d5a5e951"},
-    {file = "safetensors-0.4.2-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:36239a0060b537a3e8c473df78cffee14c3ec4f51d5f1a853af99371a2fb2a35"},
-    {file = "safetensors-0.4.2-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:d0cbb7664fad2c307f95195f951b7059e95dc23e0e1822e5978c8b500098543c"},
-    {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b3e55adb6bd9dc1c2a341e72f48f075953fa35d173dd8e29a95b3b02d0d1462"},
-    {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42f743b3cca863fba53ca57a193f510e5ec359b97f38c282437716b6768e4a25"},
-    {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e6af4a6dbeb06c4e6e7d46cf9c716cbc4cc5ef62584fd8a7c0fe558562df45"},
-    {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a492ba21b5c8f14ee5ec9b20f42ba969e53ca1f909a4d04aad736b66a341dcc2"},
-    {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b25b8233a1a85dc67e39838951cfb01595d792f3b7b644add63edb652992e030"},
-    {file = "safetensors-0.4.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fd27e063fbdafe776f7b1714da59110e88f270e86db00788a8fd65f4eacfeba7"},
-    {file = "safetensors-0.4.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1b6fa399f251bbeb52029bf5a0ac2878d7705dd3612a2f8895b48e9c11f0367d"},
-    {file = "safetensors-0.4.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:de642d46b459e4afd5c2020b26c0d6d869a171ea00411897d5776c127cac74f0"},
-    {file = "safetensors-0.4.2-cp37-none-win32.whl", hash = "sha256:77b72d17754c93bb68f3598182f14d78776e0b9b31682ca5bb2c7c5bd9a75267"},
-    {file = "safetensors-0.4.2-cp37-none-win_amd64.whl", hash = "sha256:d36ee3244d461cd655aeef493792c3bccf4875282f8407fd9af99e9a41cf2530"},
-    {file = "safetensors-0.4.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:16b6b3884f7876c6b3b23a742428223a7170a5a9dac819d8c12a1569422c4b5a"},
-    {file = "safetensors-0.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ee25d311493fbbe0be9d395faee46e9d79e8948f461e388ff39e59875ed9a350"},
-    {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eed8097968585cd752a1171f86fce9aa1d89a29033e5cd8bec5a502e29f6b7af"},
-    {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:880e6865cf72cb67f9ab8d04a3c4b49dd95ae92fb1583929ce65aed94e1f685f"},
-    {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91290f83daf80ce6d1a7f629b244443c200060a80f908b29d879021409e5ea94"},
-    {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3517d568486ab3508a7acc360b82d7a4a3e26b86efdf210a9ecd9d233c40708a"},
-    {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1f43a77eb38540f782999e5dc5645164fe9027d3f0194f6c9a5126168017efa"},
-    {file = "safetensors-0.4.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b684d9818aa5d63fddc65f7d0151968037d255d91adf74eba82125b41c680aaa"},
-    {file = "safetensors-0.4.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:ab1f5d84185f9fefaf21413efb764e4908057b8a9a0b987ede890c353490fd70"},
-    {file = "safetensors-0.4.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2bd979642e6c3a517ef4b84ff36c2fee4015664fea05a61154fc565978347553"},
-    {file = "safetensors-0.4.2-cp38-none-win32.whl", hash = "sha256:11be6e7afed29e5a5628f0aa6214e34bc194da73f558dc69fc7d56e07037422a"},
-    {file = "safetensors-0.4.2-cp38-none-win_amd64.whl", hash = "sha256:2f7a6e5d29bd2cc340cffaa391fa437b1be9d21a2bd8b8724d2875d13a6ef2a9"},
-    {file = "safetensors-0.4.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a5a921b4fe6925f9942adff3ebae8c16e0487908c54586a5a42f35b59fd69794"},
-    {file = "safetensors-0.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b691727228c28f2d82d8a92b2bc26e7a1f129ee40b2f2a3185b5974e038ed47c"},
-    {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:91ca1056decc4e981248786e87b2a202d4841ee5f99d433f1adf3d44d4bcfa0e"},
-    {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:55969fd2e6fdb38dc221b0ab380668c21b0efa12a7562db9924759faa3c51757"},
-    {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ae429bfaecc10ab5fe78c93009b3d1656c1581da560041e700eadb497dbe7a4"},
-    {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ff88f194fe4ac50b463a4a6f0c03af9ad72eb5d24ec6d6730af59522e37fedb"},
-    {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a80cb48d0a447f8dd18e61813efa7d3f8f8d52edf0f05806abc0c59b83431f57"},
-    {file = "safetensors-0.4.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b286fb7adfee70a4189898ac2342b8a67d5f493e6b21b0af89ca8eac1b967cbf"},
-    {file = "safetensors-0.4.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0ceeff9ddbab4f78738489eb6682867ae946178776f33699737b2129b5394dc1"},
-    {file = "safetensors-0.4.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a26fae748a7488cb3aac381eddfa818c42052c87b5e689fb4c6e82ed58cec209"},
-    {file = "safetensors-0.4.2-cp39-none-win32.whl", hash = "sha256:039a42ab33c9d68b39706fd38f1922ace26866eff246bf20271edb619f5f848b"},
-    {file = "safetensors-0.4.2-cp39-none-win_amd64.whl", hash = "sha256:b3a3e1f5b85859e398773f064943b62a4059f225008a2a8ee6add1edcf77cacf"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:4e70d442ad17e8b153ef9095bf48ea64f15a66bf26dc2b6ca94660c154edbc24"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b90f1d9809caf4ff395951b4703295a68d12907f6945bbc3129e934ff8ae46f6"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c7ac9ad3728838006598e296b3ae9f27d80b489effd4685b92d97b3fc4c98f6"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de5730d77e6ff7f4c7039e20913661ad0ea2f86c09e71c039e73dfdd1f394f08"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:44feb8cb156d6803dcd19fc6b81b27235f29b877660605a6ac35e1da7d64f0e4"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:523a241c33e7c827ab9a3a23760d75c7d062f43dfe55b6b019409f89b0fb52d1"},
-    {file = "safetensors-0.4.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:fb18300e8eb74291225214f26c9a8ae2110fd61a6c9b5a2ff4c4e0eb1bb9a998"},
-    {file = "safetensors-0.4.2-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fe5437ff9fb116e44f2ab558981249ae63f978392b4576e62fcfe167d353edbc"},
-    {file = "safetensors-0.4.2-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9304a0934ced5a5d272f39de36291dc141dfc152d277f03fb4d65f2fb2ffa7c"},
-    {file = "safetensors-0.4.2-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:160ba1b1e11cf874602c233ab80a14f588571d09556cbc3586900121d622b5ed"},
-    {file = "safetensors-0.4.2-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04fcd6fcf7d9c13c7e5dc7e08de5e492ee4daa8f4ad74b4d8299d3eb0224292f"},
-    {file = "safetensors-0.4.2-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:906d14c4a677d35834fb0f3a5455ef8305e1bba10a5e0f2e0f357b3d1ad989f2"},
-    {file = "safetensors-0.4.2-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:df3fcdec0cd543084610d1f09c65cdb10fb3079f79bceddc092b0d187c6a265b"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5ca76f13fb1cef242ea3ad2cb37388e7d005994f42af8b44bee56ba48b2d45ce"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:278a1a3414c020785decdcd741c578725721274d2f9f787fcc930882e83b89cc"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05b5a461cc68ecd42d9d546e5e1268a39d8ede7934a68d1ce17c3c659cb829d6"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c2341411412a41671d25e26bed59ec121e46bf4fadb8132895e610411c4b9681"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3497ac3895acf17c5f98197f1fa4769f09c5e7ede07fcb102f1c201e663e052c"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:01b5e71d3754d2201294f1eb7a6d59cce3a5702ff96d83d226571b2ca2183837"},
-    {file = "safetensors-0.4.2-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:3627dbd1ea488dd8046a0491de5087f3c0d641e7acc80c0189a33c69398f1cd1"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9d56f0ef53afad26ec54ceede78a43e9a23a076dadbbda7b44d304c591abf4c1"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b259ca73d42daf658a1bda463f1f83885ae4d93a60869be80d7f7dfcc9d8bbb5"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ebc3cd401e4eb54e7c0a70346be565e81942d9a41fafd5f4bf7ab3a55d10378"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5bc384a0309b706aa0425c93abb0390508a61bf029ce99c7d9df4220f25871a5"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:af2d8f7235d8a08fbccfb8394387890e7fa38942b349a94e6eff13c52ac98087"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:0911315bbcc5289087d063c2c2c7ccd711ea97a7e557a7bce005ac2cf80146aa"},
-    {file = "safetensors-0.4.2-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:1efe31673be91832d73439a2af426743e1395fc9ef7b081914e9e1d567bd7b5f"},
-    {file = "safetensors-0.4.2.tar.gz", hash = "sha256:acc85dcb09ec5e8aa787f588d7ad4d55c103f31e4ff060e17d92cc0e8b8cac73"},
+    {file = "safetensors-0.4.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dcf5705cab159ce0130cd56057f5f3425023c407e170bca60b4868048bae64fd"},
+    {file = "safetensors-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:bb4f8c5d0358a31e9a08daeebb68f5e161cdd4018855426d3f0c23bb51087055"},
+    {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:70a5319ef409e7f88686a46607cbc3c428271069d8b770076feaf913664a07ac"},
+    {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fb9c65bd82f9ef3ce4970dc19ee86be5f6f93d032159acf35e663c6bea02b237"},
+    {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:edb5698a7bc282089f64c96c477846950358a46ede85a1c040e0230344fdde10"},
+    {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efcc860be094b8d19ac61b452ec635c7acb9afa77beb218b1d7784c6d41fe8ad"},
+    {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d88b33980222085dd6001ae2cad87c6068e0991d4f5ccf44975d216db3b57376"},
+    {file = "safetensors-0.4.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5fc6775529fb9f0ce2266edd3e5d3f10aab068e49f765e11f6f2a63b5367021d"},
+    {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9c6ad011c1b4e3acff058d6b090f1da8e55a332fbf84695cf3100c649cc452d1"},
+    {file = "safetensors-0.4.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8c496c5401c1b9c46d41a7688e8ff5b0310a3b9bae31ce0f0ae870e1ea2b8caf"},
+    {file = "safetensors-0.4.3-cp310-none-win32.whl", hash = "sha256:38e2a8666178224a51cca61d3cb4c88704f696eac8f72a49a598a93bbd8a4af9"},
+    {file = "safetensors-0.4.3-cp310-none-win_amd64.whl", hash = "sha256:393e6e391467d1b2b829c77e47d726f3b9b93630e6a045b1d1fca67dc78bf632"},
+    {file = "safetensors-0.4.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:22f3b5d65e440cec0de8edaa672efa888030802e11c09b3d6203bff60ebff05a"},
+    {file = "safetensors-0.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c4fa560ebd4522adddb71dcd25d09bf211b5634003f015a4b815b7647d62ebe"},
+    {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e9afd5358719f1b2cf425fad638fc3c887997d6782da317096877e5b15b2ce93"},
+    {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d8c5093206ef4b198600ae484230402af6713dab1bd5b8e231905d754022bec7"},
+    {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e0b2104df1579d6ba9052c0ae0e3137c9698b2d85b0645507e6fd1813b70931a"},
+    {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8cf18888606dad030455d18f6c381720e57fc6a4170ee1966adb7ebc98d4d6a3"},
+    {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0bf4f9d6323d9f86eef5567eabd88f070691cf031d4c0df27a40d3b4aaee755b"},
+    {file = "safetensors-0.4.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:585c9ae13a205807b63bef8a37994f30c917ff800ab8a1ca9c9b5d73024f97ee"},
+    {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faefeb3b81bdfb4e5a55b9bbdf3d8d8753f65506e1d67d03f5c851a6c87150e9"},
+    {file = "safetensors-0.4.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:befdf0167ad626f22f6aac6163477fcefa342224a22f11fdd05abb3995c1783c"},
+    {file = "safetensors-0.4.3-cp311-none-win32.whl", hash = "sha256:a7cef55929dcbef24af3eb40bedec35d82c3c2fa46338bb13ecf3c5720af8a61"},
+    {file = "safetensors-0.4.3-cp311-none-win_amd64.whl", hash = "sha256:840b7ac0eff5633e1d053cc9db12fdf56b566e9403b4950b2dc85393d9b88d67"},
+    {file = "safetensors-0.4.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:22d21760dc6ebae42e9c058d75aa9907d9f35e38f896e3c69ba0e7b213033856"},
+    {file = "safetensors-0.4.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d22c1a10dff3f64d0d68abb8298a3fd88ccff79f408a3e15b3e7f637ef5c980"},
+    {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1648568667f820b8c48317c7006221dc40aced1869908c187f493838a1362bc"},
+    {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:446e9fe52c051aeab12aac63d1017e0f68a02a92a027b901c4f8e931b24e5397"},
+    {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fef5d70683643618244a4f5221053567ca3e77c2531e42ad48ae05fae909f542"},
+    {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a1f4430cc0c9d6afa01214a4b3919d0a029637df8e09675ceef1ca3f0dfa0df"},
+    {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d603846a8585b9432a0fd415db1d4c57c0f860eb4aea21f92559ff9902bae4d"},
+    {file = "safetensors-0.4.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a844cdb5d7cbc22f5f16c7e2a0271170750763c4db08381b7f696dbd2c78a361"},
+    {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:88887f69f7a00cf02b954cdc3034ffb383b2303bc0ab481d4716e2da51ddc10e"},
+    {file = "safetensors-0.4.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ee463219d9ec6c2be1d331ab13a8e0cd50d2f32240a81d498266d77d07b7e71e"},
+    {file = "safetensors-0.4.3-cp312-none-win32.whl", hash = "sha256:d0dd4a1db09db2dba0f94d15addc7e7cd3a7b0d393aa4c7518c39ae7374623c3"},
+    {file = "safetensors-0.4.3-cp312-none-win_amd64.whl", hash = "sha256:d14d30c25897b2bf19b6fb5ff7e26cc40006ad53fd4a88244fdf26517d852dd7"},
+    {file = "safetensors-0.4.3-cp37-cp37m-macosx_10_12_x86_64.whl", hash = "sha256:d1456f814655b224d4bf6e7915c51ce74e389b413be791203092b7ff78c936dd"},
+    {file = "safetensors-0.4.3-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:455d538aa1aae4a8b279344a08136d3f16334247907b18a5c3c7fa88ef0d3c46"},
+    {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cf476bca34e1340ee3294ef13e2c625833f83d096cfdf69a5342475602004f95"},
+    {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:02ef3a24face643456020536591fbd3c717c5abaa2737ec428ccbbc86dffa7a4"},
+    {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7de32d0d34b6623bb56ca278f90db081f85fb9c5d327e3c18fd23ac64f465768"},
+    {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a0deb16a1d3ea90c244ceb42d2c6c276059616be21a19ac7101aa97da448faf"},
+    {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c59d51f182c729f47e841510b70b967b0752039f79f1de23bcdd86462a9b09ee"},
+    {file = "safetensors-0.4.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1f598b713cc1a4eb31d3b3203557ac308acf21c8f41104cdd74bf640c6e538e3"},
+    {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:5757e4688f20df083e233b47de43845d1adb7e17b6cf7da5f8444416fc53828d"},
+    {file = "safetensors-0.4.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:fe746d03ed8d193674a26105e4f0fe6c726f5bb602ffc695b409eaf02f04763d"},
+    {file = "safetensors-0.4.3-cp37-none-win32.whl", hash = "sha256:0d5ffc6a80f715c30af253e0e288ad1cd97a3d0086c9c87995e5093ebc075e50"},
+    {file = "safetensors-0.4.3-cp37-none-win_amd64.whl", hash = "sha256:a11c374eb63a9c16c5ed146457241182f310902bd2a9c18255781bb832b6748b"},
+    {file = "safetensors-0.4.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:b1e31be7945f66be23f4ec1682bb47faa3df34cb89fc68527de6554d3c4258a4"},
+    {file = "safetensors-0.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:03a4447c784917c9bf01d8f2ac5080bc15c41692202cd5f406afba16629e84d6"},
+    {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d244bcafeb1bc06d47cfee71727e775bca88a8efda77a13e7306aae3813fa7e4"},
+    {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53c4879b9c6bd7cd25d114ee0ef95420e2812e676314300624594940a8d6a91f"},
+    {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:74707624b81f1b7f2b93f5619d4a9f00934d5948005a03f2c1845ffbfff42212"},
+    {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d52c958dc210265157573f81d34adf54e255bc2b59ded6218500c9b15a750eb"},
+    {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f9568f380f513a60139971169c4a358b8731509cc19112369902eddb33faa4d"},
+    {file = "safetensors-0.4.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0d9cd8e1560dfc514b6d7859247dc6a86ad2f83151a62c577428d5102d872721"},
+    {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:89f9f17b0dacb913ed87d57afbc8aad85ea42c1085bd5de2f20d83d13e9fc4b2"},
+    {file = "safetensors-0.4.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1139eb436fd201c133d03c81209d39ac57e129f5e74e34bb9ab60f8d9b726270"},
+    {file = "safetensors-0.4.3-cp38-none-win32.whl", hash = "sha256:d9c289f140a9ae4853fc2236a2ffc9a9f2d5eae0cb673167e0f1b8c18c0961ac"},
+    {file = "safetensors-0.4.3-cp38-none-win_amd64.whl", hash = "sha256:622afd28968ef3e9786562d352659a37de4481a4070f4ebac883f98c5836563e"},
+    {file = "safetensors-0.4.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8651c7299cbd8b4161a36cd6a322fa07d39cd23535b144d02f1c1972d0c62f3c"},
+    {file = "safetensors-0.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e375d975159ac534c7161269de24ddcd490df2157b55c1a6eeace6cbb56903f0"},
+    {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:084fc436e317f83f7071fc6a62ca1c513b2103db325cd09952914b50f51cf78f"},
+    {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:41a727a7f5e6ad9f1db6951adee21bbdadc632363d79dc434876369a17de6ad6"},
+    {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e7dbbde64b6c534548696808a0e01276d28ea5773bc9a2dfb97a88cd3dffe3df"},
+    {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bbae3b4b9d997971431c346edbfe6e41e98424a097860ee872721e176040a893"},
+    {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01e4b22e3284cd866edeabe4f4d896229495da457229408d2e1e4810c5187121"},
+    {file = "safetensors-0.4.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dd37306546b58d3043eb044c8103a02792cc024b51d1dd16bd3dd1f334cb3ed"},
+    {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d8815b5e1dac85fc534a97fd339e12404db557878c090f90442247e87c8aeaea"},
+    {file = "safetensors-0.4.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e011cc162503c19f4b1fd63dfcddf73739c7a243a17dac09b78e57a00983ab35"},
+    {file = "safetensors-0.4.3-cp39-none-win32.whl", hash = "sha256:01feb3089e5932d7e662eda77c3ecc389f97c0883c4a12b5cfdc32b589a811c3"},
+    {file = "safetensors-0.4.3-cp39-none-win_amd64.whl", hash = "sha256:3f9cdca09052f585e62328c1c2923c70f46814715c795be65f0b93f57ec98a02"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:1b89381517891a7bb7d1405d828b2bf5d75528299f8231e9346b8eba092227f9"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:cd6fff9e56df398abc5866b19a32124815b656613c1c5ec0f9350906fd798aac"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:840caf38d86aa7014fe37ade5d0d84e23dcfbc798b8078015831996ecbc206a3"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9650713b2cfa9537a2baf7dd9fee458b24a0aaaa6cafcea8bdd5fb2b8efdc34"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e4119532cd10dba04b423e0f86aecb96cfa5a602238c0aa012f70c3a40c44b50"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:e066e8861eef6387b7c772344d1fe1f9a72800e04ee9a54239d460c400c72aab"},
+    {file = "safetensors-0.4.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:90964917f5b0fa0fa07e9a051fbef100250c04d150b7026ccbf87a34a54012e0"},
+    {file = "safetensors-0.4.3-pp37-pypy37_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c41e1893d1206aa7054029681778d9a58b3529d4c807002c156d58426c225173"},
+    {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae7613a119a71a497d012ccc83775c308b9c1dab454806291427f84397d852fd"},
+    {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f9bac020faba7f5dc481e881b14b6425265feabb5bfc552551d21189c0eddc3"},
+    {file = "safetensors-0.4.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:420a98f593ff9930f5822560d14c395ccbc57342ddff3b463bc0b3d6b1951550"},
+    {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:f5e6883af9a68c0028f70a4c19d5a6ab6238a379be36ad300a22318316c00cb0"},
+    {file = "safetensors-0.4.3-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:cdd0a3b5da66e7f377474599814dbf5cbf135ff059cc73694de129b58a5e8a2c"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9bfb92f82574d9e58401d79c70c716985dc049b635fef6eecbb024c79b2c46ad"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:3615a96dd2dcc30eb66d82bc76cda2565f4f7bfa89fcb0e31ba3cea8a1a9ecbb"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:868ad1b6fc41209ab6bd12f63923e8baeb1a086814cb2e81a65ed3d497e0cf8f"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b7ffba80aa49bd09195145a7fd233a7781173b422eeb995096f2b30591639517"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0acbe31340ab150423347e5b9cc595867d814244ac14218932a5cf1dd38eb39"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19bbdf95de2cf64f25cd614c5236c8b06eb2cfa47cbf64311f4b5d80224623a3"},
+    {file = "safetensors-0.4.3-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b852e47eb08475c2c1bd8131207b405793bfc20d6f45aff893d3baaad449ed14"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5d07cbca5b99babb692d76d8151bec46f461f8ad8daafbfd96b2fca40cadae65"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1ab6527a20586d94291c96e00a668fa03f86189b8a9defa2cdd34a1a01acc7d5"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:02318f01e332cc23ffb4f6716e05a492c5f18b1d13e343c49265149396284a44"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec4b52ce9a396260eb9731eb6aea41a7320de22ed73a1042c2230af0212758ce"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:018b691383026a2436a22b648873ed11444a364324e7088b99cd2503dd828400"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:309b10dbcab63269ecbf0e2ca10ce59223bb756ca5d431ce9c9eeabd446569da"},
+    {file = "safetensors-0.4.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:b277482120df46e27a58082df06a15aebda4481e30a1c21eefd0921ae7e03f65"},
+    {file = "safetensors-0.4.3.tar.gz", hash = "sha256:2f85fc50c4e07a21e95c24e07460fe6f7e2859d0ce88092838352b798ce711c2"},
 ]
 
 [package.extras]
@@ -2212,9 +2786,51 @@ paddlepaddle = ["paddlepaddle (>=2.4.1)", "safetensors[numpy]"]
 pinned-tf = ["safetensors[numpy]", "tensorflow (==2.11.0)"]
 quality = ["black (==22.3)", "click (==8.0.4)", "flake8 (>=3.8.3)", "isort (>=5.5.4)"]
 tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"]
-testing = ["h5py (>=3.7.0)", "huggingface_hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools_rust (>=1.5.2)"]
+testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"]
 torch = ["safetensors[numpy]", "torch (>=1.10)"]
 
+[[package]]
+name = "scipy"
+version = "1.13.0"
+description = "Fundamental algorithms for scientific computing in Python"
+optional = true
+python-versions = ">=3.9"
+files = [
+    {file = "scipy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba419578ab343a4e0a77c0ef82f088238a93eef141b2b8017e46149776dfad4d"},
+    {file = "scipy-1.13.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:22789b56a999265431c417d462e5b7f2b487e831ca7bef5edeb56efe4c93f86e"},
+    {file = "scipy-1.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05f1432ba070e90d42d7fd836462c50bf98bd08bed0aa616c359eed8a04e3922"},
+    {file = "scipy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8434f6f3fa49f631fae84afee424e2483289dfc30a47755b4b4e6b07b2633a4"},
+    {file = "scipy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:dcbb9ea49b0167de4167c40eeee6e167caeef11effb0670b554d10b1e693a8b9"},
+    {file = "scipy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:1d2f7bb14c178f8b13ebae93f67e42b0a6b0fc50eba1cd8021c9b6e08e8fb1cd"},
+    {file = "scipy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fbcf8abaf5aa2dc8d6400566c1a727aed338b5fe880cde64907596a89d576fa"},
+    {file = "scipy-1.13.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:5e4a756355522eb60fcd61f8372ac2549073c8788f6114449b37e9e8104f15a5"},
+    {file = "scipy-1.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5acd8e1dbd8dbe38d0004b1497019b2dbbc3d70691e65d69615f8a7292865d7"},
+    {file = "scipy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ff7dad5d24a8045d836671e082a490848e8639cabb3dbdacb29f943a678683d"},
+    {file = "scipy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4dca18c3ffee287ddd3bc8f1dabaf45f5305c5afc9f8ab9cbfab855e70b2df5c"},
+    {file = "scipy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:a2f471de4d01200718b2b8927f7d76b5d9bde18047ea0fa8bd15c5ba3f26a1d6"},
+    {file = "scipy-1.13.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d0de696f589681c2802f9090fff730c218f7c51ff49bf252b6a97ec4a5d19e8b"},
+    {file = "scipy-1.13.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:b2a3ff461ec4756b7e8e42e1c681077349a038f0686132d623fa404c0bee2551"},
+    {file = "scipy-1.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bf9fe63e7a4bf01d3645b13ff2aa6dea023d38993f42aaac81a18b1bda7a82a"},
+    {file = "scipy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1e7626dfd91cdea5714f343ce1176b6c4745155d234f1033584154f60ef1ff42"},
+    {file = "scipy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:109d391d720fcebf2fbe008621952b08e52907cf4c8c7efc7376822151820820"},
+    {file = "scipy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:8930ae3ea371d6b91c203b1032b9600d69c568e537b7988a3073dfe4d4774f21"},
+    {file = "scipy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5407708195cb38d70fd2d6bb04b1b9dd5c92297d86e9f9daae1576bd9e06f602"},
+    {file = "scipy-1.13.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:ac38c4c92951ac0f729c4c48c9e13eb3675d9986cc0c83943784d7390d540c78"},
+    {file = "scipy-1.13.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09c74543c4fbeb67af6ce457f6a6a28e5d3739a87f62412e4a16e46f164f0ae5"},
+    {file = "scipy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28e286bf9ac422d6beb559bc61312c348ca9b0f0dae0d7c5afde7f722d6ea13d"},
+    {file = "scipy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:33fde20efc380bd23a78a4d26d59fc8704e9b5fd9b08841693eb46716ba13d86"},
+    {file = "scipy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:45c08bec71d3546d606989ba6e7daa6f0992918171e2a6f7fbedfa7361c2de1e"},
+    {file = "scipy-1.13.0.tar.gz", hash = "sha256:58569af537ea29d3f78e5abd18398459f195546bb3be23d16677fb26616cc11e"},
+]
+
+[package.dependencies]
+numpy = ">=1.22.4,<2.3"
+
+[package.extras]
+dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy", "pycodestyle", "pydevtool", "rich-click", "ruff", "types-psutil", "typing_extensions"]
+doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.12.0)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0)", "sphinx-design (>=0.4.0)"]
+test = ["array-api-strict", "asv", "gmpy2", "hypothesis (>=6.30)", "mpmath", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]
+
 [[package]]
 name = "sentencepiece"
 version = "0.1.99"
@@ -2271,19 +2887,19 @@ files = [
 
 [[package]]
 name = "setuptools"
-version = "69.0.2"
+version = "69.5.1"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "setuptools-69.0.2-py3-none-any.whl", hash = "sha256:1e8fdff6797d3865f37397be788a4e3cba233608e9b509382a2777d25ebde7f2"},
-    {file = "setuptools-69.0.2.tar.gz", hash = "sha256:735896e78a4742605974de002ac60562d286fa8051a7e2299445e8e8fbb01aa6"},
+    {file = "setuptools-69.5.1-py3-none-any.whl", hash = "sha256:c636ac361bc47580504644275c9ad802c50415c7522212252c033bd15f301f32"},
+    {file = "setuptools-69.5.1.tar.gz", hash = "sha256:6c1fccdac05a97e598fb0ae3bbed5904ccb317337a51139dcd51453611bbb987"},
 ]
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
-testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
-testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"]
+testing = ["build[virtualenv]", "filelock (>=3.4.0)", "importlib-metadata", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "mypy (==1.9)", "packaging (>=23.2)", "pip (>=19.1)", "pytest (>=6,!=8.1.1)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-home (>=0.5)", "pytest-mypy", "pytest-perf", "pytest-ruff (>=0.2.1)", "pytest-timeout", "pytest-xdist (>=3)", "tomli", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"]
+testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.2)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"]
 
 [[package]]
 name = "six"
@@ -2310,6 +2926,19 @@ files = [
 [package.dependencies]
 mpmath = ">=0.19"
 
+[[package]]
+name = "tbb"
+version = "2021.12.0"
+description = "Intel® oneAPI Threading Building Blocks (oneTBB)"
+optional = false
+python-versions = "*"
+files = [
+    {file = "tbb-2021.12.0-py2.py3-none-manylinux1_i686.whl", hash = "sha256:f2cc9a7f8ababaa506cbff796ce97c3bf91062ba521e15054394f773375d81d8"},
+    {file = "tbb-2021.12.0-py2.py3-none-manylinux1_x86_64.whl", hash = "sha256:a925e9a7c77d3a46ae31c34b0bb7f801c4118e857d137b68f68a8e458fcf2bd7"},
+    {file = "tbb-2021.12.0-py3-none-win32.whl", hash = "sha256:b1725b30c174048edc8be70bd43bb95473f396ce895d91151a474d0fa9f450a8"},
+    {file = "tbb-2021.12.0-py3-none-win_amd64.whl", hash = "sha256:fc2772d850229f2f3df85f1109c4844c495a2db7433d38200959ee9265b34789"},
+]
+
 [[package]]
 name = "tokenizers"
 version = "0.15.2"
@@ -2450,37 +3079,38 @@ files = [
 
 [[package]]
 name = "torch"
-version = "2.1.1"
+version = "2.3.0"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "torch-2.1.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:5ebc43f5355a9b7be813392b3fb0133991f0380f6f0fcc8218d5468dc45d1071"},
-    {file = "torch-2.1.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:84fefd63356416c0cd20578637ccdbb82164993400ed17b57c951dd6376dcee8"},
-    {file = "torch-2.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:0a7a9da0c324409bcb5a7bdad1b4e94e936d21c2590aaa7ac2f63968da8c62f7"},
-    {file = "torch-2.1.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:1e1e5faddd43a8f2c0e0e22beacd1e235a2e447794d807483c94a9e31b54a758"},
-    {file = "torch-2.1.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:e76bf3c5c354874f1da465c852a2fb60ee6cbce306e935337885760f080f9baa"},
-    {file = "torch-2.1.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:98fea993639b0bb432dfceb7b538f07c0f1c33386d63f635219f49254968c80f"},
-    {file = "torch-2.1.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:61b51b33c61737c287058b0c3061e6a9d3c363863e4a094f804bc486888a188a"},
-    {file = "torch-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:1d70920da827e2276bf07f7ec46958621cad18d228c97da8f9c19638474dbd52"},
-    {file = "torch-2.1.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:a70593806f1d7e6b53657d96810518da0f88ef2608c98a402955765b8c79d52c"},
-    {file = "torch-2.1.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:e312f7e82e49565f7667b0bbf9559ab0c597063d93044740781c02acd5a87978"},
-    {file = "torch-2.1.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:1e3cbecfa5a7314d828f4a37b0c286714dc9aa2e69beb7a22f7aca76567ed9f4"},
-    {file = "torch-2.1.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:9ca0fcbf3d5ba644d6a8572c83a9abbdf5f7ff575bc38529ef6c185a3a71bde9"},
-    {file = "torch-2.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:2dc9f312fc1fa0d61a565a0292ad73119d4b74c9f8b5031b55f8b4722abca079"},
-    {file = "torch-2.1.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:d56b032176458e2af4709627bbd2c20fe2917eff8cd087a7fe313acccf5ce2f1"},
-    {file = "torch-2.1.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:29e3b90a8c281f6660804a939d1f4218604c80162e521e1e6d8c8557325902a0"},
-    {file = "torch-2.1.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:bd95cee8511584b67ddc0ba465c3f1edeb5708d833ee02af1206b4486f1d9096"},
-    {file = "torch-2.1.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:b31230bd058424e56dba7f899280dbc6ac8b9948e43902e0c84a44666b1ec151"},
-    {file = "torch-2.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:403f1095e665e4f35971b43797a920725b8b205723aa68254a4050c6beca29b6"},
-    {file = "torch-2.1.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:715b50d8c1de5da5524a68287eb000f73e026e74d5f6b12bc450ef6995fcf5f9"},
-    {file = "torch-2.1.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:db67e8725c76f4c7f4f02e7551bb16e81ba1a1912867bc35d7bb96d2be8c78b4"},
+    {file = "torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:d8ea5a465dbfd8501f33c937d1f693176c9aef9d1c1b0ca1d44ed7b0a18c52ac"},
+    {file = "torch-2.3.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:09c81c5859a5b819956c6925a405ef1cdda393c9d8a01ce3851453f699d3358c"},
+    {file = "torch-2.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:1bf023aa20902586f614f7682fedfa463e773e26c58820b74158a72470259459"},
+    {file = "torch-2.3.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:758ef938de87a2653bba74b91f703458c15569f1562bf4b6c63c62d9c5a0c1f5"},
+    {file = "torch-2.3.0-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:493d54ee2f9df100b5ce1d18c96dbb8d14908721f76351e908c9d2622773a788"},
+    {file = "torch-2.3.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:bce43af735c3da16cc14c7de2be7ad038e2fbf75654c2e274e575c6c05772ace"},
+    {file = "torch-2.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:729804e97b7cf19ae9ab4181f91f5e612af07956f35c8b2c8e9d9f3596a8e877"},
+    {file = "torch-2.3.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:d24e328226d8e2af7cf80fcb1d2f1d108e0de32777fab4aaa2b37b9765d8be73"},
+    {file = "torch-2.3.0-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:b0de2bdc0486ea7b14fc47ff805172df44e421a7318b7c4d92ef589a75d27410"},
+    {file = "torch-2.3.0-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:a306c87a3eead1ed47457822c01dfbd459fe2920f2d38cbdf90de18f23f72542"},
+    {file = "torch-2.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9b98bf1a3c8af2d4c41f0bf1433920900896c446d1ddc128290ff146d1eb4bd"},
+    {file = "torch-2.3.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:dca986214267b34065a79000cee54232e62b41dff1ec2cab9abc3fc8b3dee0ad"},
+    {file = "torch-2.3.0-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:20572f426965dd8a04e92a473d7e445fa579e09943cc0354f3e6fef6130ce061"},
+    {file = "torch-2.3.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:e65ba85ae292909cde0dde6369826d51165a3fc8823dc1854cd9432d7f79b932"},
+    {file = "torch-2.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:5515503a193781fd1b3f5c474e89c9dfa2faaa782b2795cc4a7ab7e67de923f6"},
+    {file = "torch-2.3.0-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:6ae9f64b09516baa4ef890af0672dc981c20b1f0d829ce115d4420a247e88fba"},
+    {file = "torch-2.3.0-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:cd0dc498b961ab19cb3f8dbf0c6c50e244f2f37dbfa05754ab44ea057c944ef9"},
+    {file = "torch-2.3.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:e05f836559251e4096f3786ee99f4a8cbe67bc7fbedba8ad5e799681e47c5e80"},
+    {file = "torch-2.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:4fb27b35dbb32303c2927da86e27b54a92209ddfb7234afb1949ea2b3effffea"},
+    {file = "torch-2.3.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:760f8bedff506ce9e6e103498f9b1e9e15809e008368594c3a66bf74a8a51380"},
 ]
 
 [package.dependencies]
 filelock = "*"
 fsspec = "*"
 jinja2 = "*"
+mkl = {version = ">=2021.1.1,<=2021.4.0", markers = "platform_system == \"Windows\""}
 networkx = "*"
 nvidia-cublas-cu12 = {version = "12.1.3.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-cuda-cupti-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
@@ -2491,25 +3121,25 @@ nvidia-cufft-cu12 = {version = "11.0.2.54", markers = "platform_system == \"Linu
 nvidia-curand-cu12 = {version = "10.3.2.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-nvidia-nccl-cu12 = {version = "2.18.1", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
+nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 sympy = "*"
-triton = {version = "2.1.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
-typing-extensions = "*"
+triton = {version = "2.3.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
+typing-extensions = ">=4.8.0"
 
 [package.extras]
-dynamo = ["jinja2"]
 opt-einsum = ["opt-einsum (>=3.3)"]
+optree = ["optree (>=0.9.1)"]
 
 [[package]]
 name = "tqdm"
-version = "4.66.1"
+version = "4.66.2"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "tqdm-4.66.1-py3-none-any.whl", hash = "sha256:d302b3c5b53d47bce91fea46679d9c3c6508cf6332229aa1e7d8653723793386"},
-    {file = "tqdm-4.66.1.tar.gz", hash = "sha256:d88e651f9db8d8551a62556d3cff9e3034274ca5d66e93197cf2490e2dcb69c7"},
+    {file = "tqdm-4.66.2-py3-none-any.whl", hash = "sha256:1ee4f8a893eb9bef51c6e35730cebf234d5d0b6bd112b0271e10ed7c24a02bd9"},
+    {file = "tqdm-4.66.2.tar.gz", hash = "sha256:6cd52cdf0fef0e0f543299cfc96fec90d7b8a7e88745f411ec33eb44d5ed3531"},
 ]
 
 [package.dependencies]
@@ -2523,13 +3153,13 @@ telegram = ["requests"]
 
 [[package]]
 name = "transformers"
-version = "4.37.2"
+version = "4.38.2"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.8.0"
 files = [
-    {file = "transformers-4.37.2-py3-none-any.whl", hash = "sha256:595a8b12a1fcc4ad0ced49ce206c58e17be68c85d7aee3d7546d04a32c910d2e"},
-    {file = "transformers-4.37.2.tar.gz", hash = "sha256:f307082ae5d528b8480611a4879a4a11651012d0e9aaea3f6cf17219ffd95542"},
+    {file = "transformers-4.38.2-py3-none-any.whl", hash = "sha256:c4029cb9f01b3dd335e52f364c52d2b37c65b4c78e02e6a08b1919c5c928573e"},
+    {file = "transformers-4.38.2.tar.gz", hash = "sha256:c5fc7ad682b8a50a48b2a4c05d4ea2de5567adb1bdd00053619dbe5960857dd5"},
 ]
 
 [package.dependencies]
@@ -2548,16 +3178,16 @@ tqdm = ">=4.27"
 
 [package.extras]
 accelerate = ["accelerate (>=0.21.0)"]
-agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch (>=1.11,!=1.12.0)"]
-all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision"]
+agents = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "datasets (!=2.5.0)", "diffusers", "opencv-python", "sentencepiece (>=0.1.91,!=0.1.92)", "torch"]
+all = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision"]
 audio = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 codecarbon = ["codecarbon (==1.2.0)"]
 deepspeed = ["accelerate (>=0.21.0)", "deepspeed (>=0.9.3)"]
-deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
-dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.19)", "urllib3 (<2.0.0)"]
-dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
-docs = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "torchaudio", "torchvision"]
+deepspeed-testing = ["GitPython (<3.1.19)", "accelerate (>=0.21.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "deepspeed (>=0.9.3)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "optuna", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "timeout-decorator"]
+dev = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "decord (==0.6.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "flax (>=0.4.1,<=0.7.0)", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+dev-tensorflow = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "isort (>=5.5.4)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "nltk", "onnxconverter-common", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "tensorboard", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timeout-decorator", "tokenizers (>=0.14,<0.19)", "urllib3 (<2.0.0)"]
+dev-torch = ["GitPython (<3.1.19)", "Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "beautifulsoup4", "codecarbon (==1.2.0)", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "fugashi (>=1.0)", "hf-doc-builder", "hf-doc-builder (>=0.3.0)", "ipadic (>=1.0.0,<2.0)", "isort (>=5.5.4)", "kenlm", "librosa", "nltk", "onnxruntime (>=1.4.0)", "onnxruntime-tools (>=1.4.2)", "optuna", "parameterized", "phonemizer", "protobuf", "psutil", "pyctcdecode (>=0.4.0)", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "ray[tune] (>=2.7.0)", "rhoknp (>=1.1.0,<1.3.1)", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "scikit-learn", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "sudachidict-core (>=20220729)", "sudachipy (>=0.6.6)", "tensorboard", "timeout-decorator", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision", "unidic (>=1.0.2)", "unidic-lite (>=1.0.7)", "urllib3 (<2.0.0)"]
+docs = ["Pillow (>=10.0.1,<=15.0)", "accelerate (>=0.21.0)", "av (==9.2.0)", "codecarbon (==1.2.0)", "decord (==0.6.0)", "flax (>=0.4.1,<=0.7.0)", "hf-doc-builder", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "kenlm", "keras-nlp (>=0.3.1)", "librosa", "onnxconverter-common", "optax (>=0.0.8,<=0.1.4)", "optuna", "phonemizer", "protobuf", "pyctcdecode (>=0.4.0)", "ray[tune] (>=2.7.0)", "sentencepiece (>=0.1.91,!=0.1.92)", "sigopt", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx", "timm", "tokenizers (>=0.14,<0.19)", "torch", "torchaudio", "torchvision"]
 docs-specific = ["hf-doc-builder"]
 flax = ["flax (>=0.4.1,<=0.7.0)", "jax (>=0.4.1,<=0.4.13)", "jaxlib (>=0.4.1,<=0.4.13)", "optax (>=0.0.8,<=0.1.4)"]
 flax-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
@@ -2574,47 +3204,45 @@ ray = ["ray[tune] (>=2.7.0)"]
 retrieval = ["datasets (!=2.5.0)", "faiss-cpu"]
 sagemaker = ["sagemaker (>=2.31.0)"]
 sentencepiece = ["protobuf", "sentencepiece (>=0.1.91,!=0.1.92)"]
-serving = ["fastapi", "pydantic (<2)", "starlette", "uvicorn"]
+serving = ["fastapi", "pydantic", "starlette", "uvicorn"]
 sigopt = ["sigopt"]
 sklearn = ["scikit-learn"]
 speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
-testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pydantic (<2)", "pytest (>=7.2.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "tensorboard", "timeout-decorator"]
+testing = ["GitPython (<3.1.19)", "beautifulsoup4", "cookiecutter (==1.7.3)", "datasets (!=2.5.0)", "dill (<0.3.5)", "evaluate (>=0.2.0)", "faiss-cpu", "hf-doc-builder (>=0.3.0)", "nltk", "parameterized", "protobuf", "psutil", "pydantic", "pytest (>=7.2.0,<8.0.0)", "pytest-timeout", "pytest-xdist", "rjieba", "rouge-score (!=0.0.7,!=0.0.8,!=0.1,!=0.1.1)", "ruff (==0.1.5)", "sacrebleu (>=1.4.12,<2.0.0)", "sacremoses", "tensorboard", "timeout-decorator"]
 tf = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
 tf-cpu = ["keras-nlp (>=0.3.1)", "onnxconverter-common", "tensorflow-cpu (>=2.6,<2.16)", "tensorflow-text (<2.16)", "tf2onnx"]
 tf-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)"]
 timm = ["timm"]
 tokenizers = ["tokenizers (>=0.14,<0.19)"]
-torch = ["accelerate (>=0.21.0)", "torch (>=1.11,!=1.12.0)"]
+torch = ["accelerate (>=0.21.0)", "torch"]
 torch-speech = ["kenlm", "librosa", "phonemizer", "pyctcdecode (>=0.4.0)", "torchaudio"]
 torch-vision = ["Pillow (>=10.0.1,<=15.0)", "torchvision"]
-torchhub = ["filelock", "huggingface-hub (>=0.19.3,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.19)", "torch (>=1.11,!=1.12.0)", "tqdm (>=4.27)"]
+torchhub = ["filelock", "huggingface-hub (>=0.19.3,<1.0)", "importlib-metadata", "numpy (>=1.17)", "packaging (>=20.0)", "protobuf", "regex (!=2019.12.17)", "requests", "sentencepiece (>=0.1.91,!=0.1.92)", "tokenizers (>=0.14,<0.19)", "torch", "tqdm (>=4.27)"]
 video = ["av (==9.2.0)", "decord (==0.6.0)"]
 vision = ["Pillow (>=10.0.1,<=15.0)"]
 
 [[package]]
 name = "triton"
-version = "2.1.0"
+version = "2.3.0"
 description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
 files = [
-    {file = "triton-2.1.0-0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:66439923a30d5d48399b08a9eae10370f6c261a5ec864a64983bae63152d39d7"},
-    {file = "triton-2.1.0-0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:919b06453f0033ea52c13eaf7833de0e57db3178d23d4e04f9fc71c4f2c32bf8"},
-    {file = "triton-2.1.0-0-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ae4bb8a91de790e1866405211c4d618379781188f40d5c4c399766914e84cd94"},
-    {file = "triton-2.1.0-0-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39f6fb6bdccb3e98f3152e3fbea724f1aeae7d749412bbb1fa9c441d474eba26"},
-    {file = "triton-2.1.0-0-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21544e522c02005a626c8ad63d39bdff2f31d41069592919ef281e964ed26446"},
-    {file = "triton-2.1.0-0-pp37-pypy37_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:143582ca31dd89cd982bd3bf53666bab1c7527d41e185f9e3d8a3051ce1b663b"},
-    {file = "triton-2.1.0-0-pp38-pypy38_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82fc5aeeedf6e36be4e4530cbdcba81a09d65c18e02f52dc298696d45721f3bd"},
-    {file = "triton-2.1.0-0-pp39-pypy39_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:81a96d110a738ff63339fc892ded095b31bd0d205e3aace262af8400d40b6fa8"},
+    {file = "triton-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ce4b8ff70c48e47274c66f269cce8861cf1dc347ceeb7a67414ca151b1822d8"},
+    {file = "triton-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c3d9607f85103afdb279938fc1dd2a66e4f5999a58eb48a346bd42738f986dd"},
+    {file = "triton-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:218d742e67480d9581bafb73ed598416cc8a56f6316152e5562ee65e33de01c0"},
+    {file = "triton-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:381ec6b3dac06922d3e4099cfc943ef032893b25415de295e82b1a82b0359d2c"},
+    {file = "triton-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:038e06a09c06a164fef9c48de3af1e13a63dc1ba3c792871e61a8e79720ea440"},
+    {file = "triton-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d8f636e0341ac348899a47a057c3daea99ea7db31528a225a3ba4ded28ccc65"},
 ]
 
 [package.dependencies]
 filelock = "*"
 
 [package.extras]
-build = ["cmake (>=3.18)", "lit"]
-tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)"]
-tutorials = ["matplotlib", "pandas", "tabulate"]
+build = ["cmake (>=3.20)", "lit"]
+tests = ["autopep8", "flake8", "isort", "numpy", "pytest", "scipy (>=1.7.1)", "torch"]
+tutorials = ["matplotlib", "pandas", "tabulate", "torch"]
 
 [[package]]
 name = "typer"
@@ -2638,39 +3266,40 @@ test = ["black (>=22.3.0,<23.0.0)", "coverage (>=5.2,<6.0)", "isort (>=5.0.6,<6.
 
 [[package]]
 name = "typing-extensions"
-version = "4.8.0"
+version = "4.11.0"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "typing_extensions-4.8.0-py3-none-any.whl", hash = "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0"},
-    {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"},
+    {file = "typing_extensions-4.11.0-py3-none-any.whl", hash = "sha256:c1f94d72897edaf4ce775bb7558d5b79d8126906a14ea5ed1635921406c0387a"},
+    {file = "typing_extensions-4.11.0.tar.gz", hash = "sha256:83f085bd5ca59c80295fc2a82ab5dac679cbe02b9f33f7d83af68e241bea51b0"},
 ]
 
 [[package]]
 name = "tzdata"
-version = "2023.3"
+version = "2024.1"
 description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
 files = [
-    {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"},
-    {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"},
+    {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
+    {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
 ]
 
 [[package]]
 name = "urllib3"
-version = "2.1.0"
+version = "2.2.1"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "urllib3-2.1.0-py3-none-any.whl", hash = "sha256:55901e917a5896a349ff771be919f8bd99aff50b79fe58fec595eb37bbc56bb3"},
-    {file = "urllib3-2.1.0.tar.gz", hash = "sha256:df7aa8afb0148fa78488e7899b2c59b5f4ffcfa82e6c54ccb9dd37c1d7b52d54"},
+    {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"},
+    {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"},
 ]
 
 [package.extras]
 brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
@@ -2886,101 +3515,101 @@ files = [
 
 [[package]]
 name = "yarl"
-version = "1.9.3"
+version = "1.9.4"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "yarl-1.9.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:32435d134414e01d937cd9d6cc56e8413a8d4741dea36af5840c7750f04d16ab"},
-    {file = "yarl-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9a5211de242754b5e612557bca701f39f8b1a9408dff73c6db623f22d20f470e"},
-    {file = "yarl-1.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:525cd69eff44833b01f8ef39aa33a9cc53a99ff7f9d76a6ef6a9fb758f54d0ff"},
-    {file = "yarl-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc94441bcf9cb8c59f51f23193316afefbf3ff858460cb47b5758bf66a14d130"},
-    {file = "yarl-1.9.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e36021db54b8a0475805acc1d6c4bca5d9f52c3825ad29ae2d398a9d530ddb88"},
-    {file = "yarl-1.9.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0f17d1df951336a02afc8270c03c0c6e60d1f9996fcbd43a4ce6be81de0bd9d"},
-    {file = "yarl-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5f3faeb8100a43adf3e7925d556801d14b5816a0ac9e75e22948e787feec642"},
-    {file = "yarl-1.9.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aed37db837ecb5962469fad448aaae0f0ee94ffce2062cf2eb9aed13328b5196"},
-    {file = "yarl-1.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:721ee3fc292f0d069a04016ef2c3a25595d48c5b8ddc6029be46f6158d129c92"},
-    {file = "yarl-1.9.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b8bc5b87a65a4e64bc83385c05145ea901b613d0d3a434d434b55511b6ab0067"},
-    {file = "yarl-1.9.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:dd952b9c64f3b21aedd09b8fe958e4931864dba69926d8a90c90d36ac4e28c9a"},
-    {file = "yarl-1.9.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:c405d482c320a88ab53dcbd98d6d6f32ada074f2d965d6e9bf2d823158fa97de"},
-    {file = "yarl-1.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9df9a0d4c5624790a0dea2e02e3b1b3c69aed14bcb8650e19606d9df3719e87d"},
-    {file = "yarl-1.9.3-cp310-cp310-win32.whl", hash = "sha256:d34c4f80956227f2686ddea5b3585e109c2733e2d4ef12eb1b8b4e84f09a2ab6"},
-    {file = "yarl-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:cf7a4e8de7f1092829caef66fd90eaf3710bc5efd322a816d5677b7664893c93"},
-    {file = "yarl-1.9.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d61a0ca95503867d4d627517bcfdc28a8468c3f1b0b06c626f30dd759d3999fd"},
-    {file = "yarl-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:73cc83f918b69110813a7d95024266072d987b903a623ecae673d1e71579d566"},
-    {file = "yarl-1.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d81657b23e0edb84b37167e98aefb04ae16cbc5352770057893bd222cdc6e45f"},
-    {file = "yarl-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26a1a8443091c7fbc17b84a0d9f38de34b8423b459fb853e6c8cdfab0eacf613"},
-    {file = "yarl-1.9.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fe34befb8c765b8ce562f0200afda3578f8abb159c76de3ab354c80b72244c41"},
-    {file = "yarl-1.9.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2c757f64afe53a422e45e3e399e1e3cf82b7a2f244796ce80d8ca53e16a49b9f"},
-    {file = "yarl-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72a57b41a0920b9a220125081c1e191b88a4cdec13bf9d0649e382a822705c65"},
-    {file = "yarl-1.9.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:632c7aeb99df718765adf58eacb9acb9cbc555e075da849c1378ef4d18bf536a"},
-    {file = "yarl-1.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b0b8c06afcf2bac5a50b37f64efbde978b7f9dc88842ce9729c020dc71fae4ce"},
-    {file = "yarl-1.9.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:1d93461e2cf76c4796355494f15ffcb50a3c198cc2d601ad8d6a96219a10c363"},
-    {file = "yarl-1.9.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:4003f380dac50328c85e85416aca6985536812c082387255c35292cb4b41707e"},
-    {file = "yarl-1.9.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4d6d74a97e898c1c2df80339aa423234ad9ea2052f66366cef1e80448798c13d"},
-    {file = "yarl-1.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b61e64b06c3640feab73fa4ff9cb64bd8182de52e5dc13038e01cfe674ebc321"},
-    {file = "yarl-1.9.3-cp311-cp311-win32.whl", hash = "sha256:29beac86f33d6c7ab1d79bd0213aa7aed2d2f555386856bb3056d5fdd9dab279"},
-    {file = "yarl-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:f7271d6bd8838c49ba8ae647fc06469137e1c161a7ef97d778b72904d9b68696"},
-    {file = "yarl-1.9.3-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:dd318e6b75ca80bff0b22b302f83a8ee41c62b8ac662ddb49f67ec97e799885d"},
-    {file = "yarl-1.9.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c4b1efb11a8acd13246ffb0bee888dd0e8eb057f8bf30112e3e21e421eb82d4a"},
-    {file = "yarl-1.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c6f034386e5550b5dc8ded90b5e2ff7db21f0f5c7de37b6efc5dac046eb19c10"},
-    {file = "yarl-1.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd49a908cb6d387fc26acee8b7d9fcc9bbf8e1aca890c0b2fdfd706057546080"},
-    {file = "yarl-1.9.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa4643635f26052401750bd54db911b6342eb1a9ac3e74f0f8b58a25d61dfe41"},
-    {file = "yarl-1.9.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e741bd48e6a417bdfbae02e088f60018286d6c141639359fb8df017a3b69415a"},
-    {file = "yarl-1.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7c86d0d0919952d05df880a1889a4f0aeb6868e98961c090e335671dea5c0361"},
-    {file = "yarl-1.9.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3d5434b34100b504aabae75f0622ebb85defffe7b64ad8f52b8b30ec6ef6e4b9"},
-    {file = "yarl-1.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:79e1df60f7c2b148722fb6cafebffe1acd95fd8b5fd77795f56247edaf326752"},
-    {file = "yarl-1.9.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:44e91a669c43f03964f672c5a234ae0d7a4d49c9b85d1baa93dec28afa28ffbd"},
-    {file = "yarl-1.9.3-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:3cfa4dbe17b2e6fca1414e9c3bcc216f6930cb18ea7646e7d0d52792ac196808"},
-    {file = "yarl-1.9.3-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:88d2c3cc4b2f46d1ba73d81c51ec0e486f59cc51165ea4f789677f91a303a9a7"},
-    {file = "yarl-1.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:cccdc02e46d2bd7cb5f38f8cc3d9db0d24951abd082b2f242c9e9f59c0ab2af3"},
-    {file = "yarl-1.9.3-cp312-cp312-win32.whl", hash = "sha256:96758e56dceb8a70f8a5cff1e452daaeff07d1cc9f11e9b0c951330f0a2396a7"},
-    {file = "yarl-1.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:c4472fe53ebf541113e533971bd8c32728debc4c6d8cc177f2bff31d011ec17e"},
-    {file = "yarl-1.9.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:126638ab961633f0940a06e1c9d59919003ef212a15869708dcb7305f91a6732"},
-    {file = "yarl-1.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c99ddaddb2fbe04953b84d1651149a0d85214780e4d0ee824e610ab549d98d92"},
-    {file = "yarl-1.9.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8dab30b21bd6fb17c3f4684868c7e6a9e8468078db00f599fb1c14e324b10fca"},
-    {file = "yarl-1.9.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:828235a2a169160ee73a2fcfb8a000709edf09d7511fccf203465c3d5acc59e4"},
-    {file = "yarl-1.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc391e3941045fd0987c77484b2799adffd08e4b6735c4ee5f054366a2e1551d"},
-    {file = "yarl-1.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:51382c72dd5377861b573bd55dcf680df54cea84147c8648b15ac507fbef984d"},
-    {file = "yarl-1.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:28a108cb92ce6cf867690a962372996ca332d8cda0210c5ad487fe996e76b8bb"},
-    {file = "yarl-1.9.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:8f18a7832ff85dfcd77871fe677b169b1bc60c021978c90c3bb14f727596e0ae"},
-    {file = "yarl-1.9.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:7eaf13af79950142ab2bbb8362f8d8d935be9aaf8df1df89c86c3231e4ff238a"},
-    {file = "yarl-1.9.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:66a6dbf6ca7d2db03cc61cafe1ee6be838ce0fbc97781881a22a58a7c5efef42"},
-    {file = "yarl-1.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:1a0a4f3aaa18580038cfa52a7183c8ffbbe7d727fe581300817efc1e96d1b0e9"},
-    {file = "yarl-1.9.3-cp37-cp37m-win32.whl", hash = "sha256:946db4511b2d815979d733ac6a961f47e20a29c297be0d55b6d4b77ee4b298f6"},
-    {file = "yarl-1.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2dad8166d41ebd1f76ce107cf6a31e39801aee3844a54a90af23278b072f1ccf"},
-    {file = "yarl-1.9.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:bb72d2a94481e7dc7a0c522673db288f31849800d6ce2435317376a345728225"},
-    {file = "yarl-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9a172c3d5447b7da1680a1a2d6ecdf6f87a319d21d52729f45ec938a7006d5d8"},
-    {file = "yarl-1.9.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2dc72e891672343b99db6d497024bf8b985537ad6c393359dc5227ef653b2f17"},
-    {file = "yarl-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8d51817cf4b8d545963ec65ff06c1b92e5765aa98831678d0e2240b6e9fd281"},
-    {file = "yarl-1.9.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:53ec65f7eee8655bebb1f6f1607760d123c3c115a324b443df4f916383482a67"},
-    {file = "yarl-1.9.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cfd77e8e5cafba3fb584e0f4b935a59216f352b73d4987be3af51f43a862c403"},
-    {file = "yarl-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e73db54c967eb75037c178a54445c5a4e7461b5203b27c45ef656a81787c0c1b"},
-    {file = "yarl-1.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09c19e5f4404574fcfb736efecf75844ffe8610606f3fccc35a1515b8b6712c4"},
-    {file = "yarl-1.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6280353940f7e5e2efaaabd686193e61351e966cc02f401761c4d87f48c89ea4"},
-    {file = "yarl-1.9.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:c25ec06e4241e162f5d1f57c370f4078797ade95c9208bd0c60f484834f09c96"},
-    {file = "yarl-1.9.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:7217234b10c64b52cc39a8d82550342ae2e45be34f5bff02b890b8c452eb48d7"},
-    {file = "yarl-1.9.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:4ce77d289f8d40905c054b63f29851ecbfd026ef4ba5c371a158cfe6f623663e"},
-    {file = "yarl-1.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5f74b015c99a5eac5ae589de27a1201418a5d9d460e89ccb3366015c6153e60a"},
-    {file = "yarl-1.9.3-cp38-cp38-win32.whl", hash = "sha256:8a2538806be846ea25e90c28786136932ec385c7ff3bc1148e45125984783dc6"},
-    {file = "yarl-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:6465d36381af057d0fab4e0f24ef0e80ba61f03fe43e6eeccbe0056e74aadc70"},
-    {file = "yarl-1.9.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:2f3c8822bc8fb4a347a192dd6a28a25d7f0ea3262e826d7d4ef9cc99cd06d07e"},
-    {file = "yarl-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b7831566595fe88ba17ea80e4b61c0eb599f84c85acaa14bf04dd90319a45b90"},
-    {file = "yarl-1.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ff34cb09a332832d1cf38acd0f604c068665192c6107a439a92abfd8acf90fe2"},
-    {file = "yarl-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe8080b4f25dfc44a86bedd14bc4f9d469dfc6456e6f3c5d9077e81a5fedfba7"},
-    {file = "yarl-1.9.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8535e111a064f3bdd94c0ed443105934d6f005adad68dd13ce50a488a0ad1bf3"},
-    {file = "yarl-1.9.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d155a092bf0ebf4a9f6f3b7a650dc5d9a5bbb585ef83a52ed36ba46f55cc39d"},
-    {file = "yarl-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:778df71c8d0c8c9f1b378624b26431ca80041660d7be7c3f724b2c7a6e65d0d6"},
-    {file = "yarl-1.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b9f9cafaf031c34d95c1528c16b2fa07b710e6056b3c4e2e34e9317072da5d1a"},
-    {file = "yarl-1.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ca6b66f69e30f6e180d52f14d91ac854b8119553b524e0e28d5291a724f0f423"},
-    {file = "yarl-1.9.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:e0e7e83f31e23c5d00ff618045ddc5e916f9e613d33c5a5823bc0b0a0feb522f"},
-    {file = "yarl-1.9.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:af52725c7c39b0ee655befbbab5b9a1b209e01bb39128dce0db226a10014aacc"},
-    {file = "yarl-1.9.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:0ab5baaea8450f4a3e241ef17e3d129b2143e38a685036b075976b9c415ea3eb"},
-    {file = "yarl-1.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6d350388ba1129bc867c6af1cd17da2b197dff0d2801036d2d7d83c2d771a682"},
-    {file = "yarl-1.9.3-cp39-cp39-win32.whl", hash = "sha256:e2a16ef5fa2382af83bef4a18c1b3bcb4284c4732906aa69422cf09df9c59f1f"},
-    {file = "yarl-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:d92d897cb4b4bf915fbeb5e604c7911021a8456f0964f3b8ebbe7f9188b9eabb"},
-    {file = "yarl-1.9.3-py3-none-any.whl", hash = "sha256:271d63396460b6607b588555ea27a1a02b717ca2e3f2cf53bdde4013d7790929"},
-    {file = "yarl-1.9.3.tar.gz", hash = "sha256:4a14907b597ec55740f63e52d7fee0e9ee09d5b9d57a4f399a7423268e457b57"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a8c1df72eb746f4136fe9a2e72b0c9dc1da1cbd23b5372f94b5820ff8ae30e0e"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a3a6ed1d525bfb91b3fc9b690c5a21bb52de28c018530ad85093cc488bee2dd2"},
+    {file = "yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c38c9ddb6103ceae4e4498f9c08fac9b590c5c71b0370f98714768e22ac6fa66"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9e09c9d74f4566e905a0b8fa668c58109f7624db96a2171f21747abc7524234"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8477c1ee4bd47c57d49621a062121c3023609f7a13b8a46953eb6c9716ca392"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5ff2c858f5f6a42c2a8e751100f237c5e869cbde669a724f2062d4c4ef93551"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:357495293086c5b6d34ca9616a43d329317feab7917518bc97a08f9e55648455"},
+    {file = "yarl-1.9.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54525ae423d7b7a8ee81ba189f131054defdb122cde31ff17477951464c1691c"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:801e9264d19643548651b9db361ce3287176671fb0117f96b5ac0ee1c3530d53"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e516dc8baf7b380e6c1c26792610230f37147bb754d6426462ab115a02944385"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:7d5aaac37d19b2904bb9dfe12cdb08c8443e7ba7d2852894ad448d4b8f442863"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:54beabb809ffcacbd9d28ac57b0db46e42a6e341a030293fb3185c409e626b8b"},
+    {file = "yarl-1.9.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bac8d525a8dbc2a1507ec731d2867025d11ceadcb4dd421423a5d42c56818541"},
+    {file = "yarl-1.9.4-cp310-cp310-win32.whl", hash = "sha256:7855426dfbddac81896b6e533ebefc0af2f132d4a47340cee6d22cac7190022d"},
+    {file = "yarl-1.9.4-cp310-cp310-win_amd64.whl", hash = "sha256:848cd2a1df56ddbffeb375535fb62c9d1645dde33ca4d51341378b3f5954429b"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:35a2b9396879ce32754bd457d31a51ff0a9d426fd9e0e3c33394bf4b9036b099"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c7d56b293cc071e82532f70adcbd8b61909eec973ae9d2d1f9b233f3d943f2c"},
+    {file = "yarl-1.9.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d8a1c6c0be645c745a081c192e747c5de06e944a0d21245f4cf7c05e457c36e0"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b3c1ffe10069f655ea2d731808e76e0f452fc6c749bea04781daf18e6039525"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:549d19c84c55d11687ddbd47eeb348a89df9cb30e1993f1b128f4685cd0ebbf8"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7409f968456111140c1c95301cadf071bd30a81cbd7ab829169fb9e3d72eae9"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e23a6d84d9d1738dbc6e38167776107e63307dfc8ad108e580548d1f2c587f42"},
+    {file = "yarl-1.9.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d8b889777de69897406c9fb0b76cdf2fd0f31267861ae7501d93003d55f54fbe"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:03caa9507d3d3c83bca08650678e25364e1843b484f19986a527630ca376ecce"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e9035df8d0880b2f1c7f5031f33f69e071dfe72ee9310cfc76f7b605958ceb9"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:c0ec0ed476f77db9fb29bca17f0a8fcc7bc97ad4c6c1d8959c507decb22e8572"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:ee04010f26d5102399bd17f8df8bc38dc7ccd7701dc77f4a68c5b8d733406958"},
+    {file = "yarl-1.9.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:49a180c2e0743d5d6e0b4d1a9e5f633c62eca3f8a86ba5dd3c471060e352ca98"},
+    {file = "yarl-1.9.4-cp311-cp311-win32.whl", hash = "sha256:81eb57278deb6098a5b62e88ad8281b2ba09f2f1147c4767522353eaa6260b31"},
+    {file = "yarl-1.9.4-cp311-cp311-win_amd64.whl", hash = "sha256:d1d2532b340b692880261c15aee4dc94dd22ca5d61b9db9a8a361953d36410b1"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0d2454f0aef65ea81037759be5ca9947539667eecebca092733b2eb43c965a81"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:44d8ffbb9c06e5a7f529f38f53eda23e50d1ed33c6c869e01481d3fafa6b8142"},
+    {file = "yarl-1.9.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaaea1e536f98754a6e5c56091baa1b6ce2f2700cc4a00b0d49eca8dea471074"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3777ce5536d17989c91696db1d459574e9a9bd37660ea7ee4d3344579bb6f129"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9fc5fc1eeb029757349ad26bbc5880557389a03fa6ada41703db5e068881e5f2"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ea65804b5dc88dacd4a40279af0cdadcfe74b3e5b4c897aa0d81cf86927fee78"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa102d6d280a5455ad6a0f9e6d769989638718e938a6a0a2ff3f4a7ff8c62cc4"},
+    {file = "yarl-1.9.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09efe4615ada057ba2d30df871d2f668af661e971dfeedf0c159927d48bbeff0"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:008d3e808d03ef28542372d01057fd09168419cdc8f848efe2804f894ae03e51"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:6f5cb257bc2ec58f437da2b37a8cd48f666db96d47b8a3115c29f316313654ff"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:992f18e0ea248ee03b5a6e8b3b4738850ae7dbb172cc41c966462801cbf62cf7"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:0e9d124c191d5b881060a9e5060627694c3bdd1fe24c5eecc8d5d7d0eb6faabc"},
+    {file = "yarl-1.9.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:3986b6f41ad22988e53d5778f91855dc0399b043fc8946d4f2e68af22ee9ff10"},
+    {file = "yarl-1.9.4-cp312-cp312-win32.whl", hash = "sha256:4b21516d181cd77ebd06ce160ef8cc2a5e9ad35fb1c5930882baff5ac865eee7"},
+    {file = "yarl-1.9.4-cp312-cp312-win_amd64.whl", hash = "sha256:a9bd00dc3bc395a662900f33f74feb3e757429e545d831eef5bb280252631984"},
+    {file = "yarl-1.9.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:63b20738b5aac74e239622d2fe30df4fca4942a86e31bf47a81a0e94c14df94f"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7d7f7de27b8944f1fee2c26a88b4dabc2409d2fea7a9ed3df79b67277644e17"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c74018551e31269d56fab81a728f683667e7c28c04e807ba08f8c9e3bba32f14"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ca06675212f94e7a610e85ca36948bb8fc023e458dd6c63ef71abfd482481aa5"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aef935237d60a51a62b86249839b51345f47564208c6ee615ed2a40878dccdd"},
+    {file = "yarl-1.9.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b134fd795e2322b7684155b7855cc99409d10b2e408056db2b93b51a52accc7"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d25039a474c4c72a5ad4b52495056f843a7ff07b632c1b92ea9043a3d9950f6e"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f7d6b36dd2e029b6bcb8a13cf19664c7b8e19ab3a58e0fefbb5b8461447ed5ec"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:957b4774373cf6f709359e5c8c4a0af9f6d7875db657adb0feaf8d6cb3c3964c"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:d7eeb6d22331e2fd42fce928a81c697c9ee2d51400bd1a28803965883e13cead"},
+    {file = "yarl-1.9.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6a962e04b8f91f8c4e5917e518d17958e3bdee71fd1d8b88cdce74dd0ebbf434"},
+    {file = "yarl-1.9.4-cp37-cp37m-win32.whl", hash = "sha256:f3bc6af6e2b8f92eced34ef6a96ffb248e863af20ef4fde9448cc8c9b858b749"},
+    {file = "yarl-1.9.4-cp37-cp37m-win_amd64.whl", hash = "sha256:ad4d7a90a92e528aadf4965d685c17dacff3df282db1121136c382dc0b6014d2"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ec61d826d80fc293ed46c9dd26995921e3a82146feacd952ef0757236fc137be"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8be9e837ea9113676e5754b43b940b50cce76d9ed7d2461df1af39a8ee674d9f"},
+    {file = "yarl-1.9.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bef596fdaa8f26e3d66af846bbe77057237cb6e8efff8cd7cc8dff9a62278bbf"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d47552b6e52c3319fede1b60b3de120fe83bde9b7bddad11a69fb0af7db32f1"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84fc30f71689d7fc9168b92788abc977dc8cefa806909565fc2951d02f6b7d57"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4aa9741085f635934f3a2583e16fcf62ba835719a8b2b28fb2917bb0537c1dfa"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:206a55215e6d05dbc6c98ce598a59e6fbd0c493e2de4ea6cc2f4934d5a18d130"},
+    {file = "yarl-1.9.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07574b007ee20e5c375a8fe4a0789fad26db905f9813be0f9fef5a68080de559"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:5a2e2433eb9344a163aced6a5f6c9222c0786e5a9e9cac2c89f0b28433f56e23"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:6ad6d10ed9b67a382b45f29ea028f92d25bc0bc1daf6c5b801b90b5aa70fb9ec"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6fe79f998a4052d79e1c30eeb7d6c1c1056ad33300f682465e1b4e9b5a188b78"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:a825ec844298c791fd28ed14ed1bffc56a98d15b8c58a20e0e08c1f5f2bea1be"},
+    {file = "yarl-1.9.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8619d6915b3b0b34420cf9b2bb6d81ef59d984cb0fde7544e9ece32b4b3043c3"},
+    {file = "yarl-1.9.4-cp38-cp38-win32.whl", hash = "sha256:686a0c2f85f83463272ddffd4deb5e591c98aac1897d65e92319f729c320eece"},
+    {file = "yarl-1.9.4-cp38-cp38-win_amd64.whl", hash = "sha256:a00862fb23195b6b8322f7d781b0dc1d82cb3bcac346d1e38689370cc1cc398b"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:604f31d97fa493083ea21bd9b92c419012531c4e17ea6da0f65cacdcf5d0bd27"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8a854227cf581330ffa2c4824d96e52ee621dd571078a252c25e3a3b3d94a1b1"},
+    {file = "yarl-1.9.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ba6f52cbc7809cd8d74604cce9c14868306ae4aa0282016b641c661f981a6e91"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a6327976c7c2f4ee6816eff196e25385ccc02cb81427952414a64811037bbc8b"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8397a3817d7dcdd14bb266283cd1d6fc7264a48c186b986f32e86d86d35fbac5"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e0381b4ce23ff92f8170080c97678040fc5b08da85e9e292292aba67fdac6c34"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:23d32a2594cb5d565d358a92e151315d1b2268bc10f4610d098f96b147370136"},
+    {file = "yarl-1.9.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ddb2a5c08a4eaaba605340fdee8fc08e406c56617566d9643ad8bf6852778fc7"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:26a1dc6285e03f3cc9e839a2da83bcbf31dcb0d004c72d0730e755b33466c30e"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:18580f672e44ce1238b82f7fb87d727c4a131f3a9d33a5e0e82b793362bf18b4"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:29e0f83f37610f173eb7e7b5562dd71467993495e568e708d99e9d1944f561ec"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:1f23e4fe1e8794f74b6027d7cf19dc25f8b63af1483d91d595d4a07eca1fb26c"},
+    {file = "yarl-1.9.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:db8e58b9d79200c76956cefd14d5c90af54416ff5353c5bfd7cbe58818e26ef0"},
+    {file = "yarl-1.9.4-cp39-cp39-win32.whl", hash = "sha256:c7224cab95645c7ab53791022ae77a4509472613e839dab722a72abe5a684575"},
+    {file = "yarl-1.9.4-cp39-cp39-win_amd64.whl", hash = "sha256:824d6c50492add5da9374875ce72db7a0733b29c2394890aef23d533106e2b15"},
+    {file = "yarl-1.9.4-py3-none-any.whl", hash = "sha256:928cecb0ef9d5a7946eb6ff58417ad2fe9375762382f1bf5c55e61645f2c43ad"},
+    {file = "yarl-1.9.4.tar.gz", hash = "sha256:566db86717cf8080b99b58b083b773a908ae40f06681e87e589a976faf8246bf"},
 ]
 
 [package.dependencies]
@@ -2989,20 +3618,20 @@ multidict = ">=4.0"
 
 [[package]]
 name = "zipp"
-version = "3.17.0"
+version = "3.18.1"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"},
-    {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"},
+    {file = "zipp-3.18.1-py3-none-any.whl", hash = "sha256:206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b"},
+    {file = "zipp-3.18.1.tar.gz", hash = "sha256:2884ed22e7d8961de1c9a05142eb69a247f120291bc0206a00a7642f09b5b715"},
 ]
 
 [package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"]
+docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
 
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.9,<3.13"
-content-hash = "8f84984b4381688a2f9461e968c730626f1da5bdb7a32a5d5f366febc6bdad01"
+content-hash = "70670851d12a378b67fd4b4ed8a4a17d0861637e13a02ddf96a119768e8444e5"
diff --git a/server/pyproject.toml b/server/pyproject.toml
index adc274cb..57f3590e 100644
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation-server"
-version = "1.2.0"
+version = "2.0.0"
 description = "Text Generation Inference Python gRPC Server"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
 
@@ -21,10 +21,11 @@ opentelemetry-exporter-otlp = "^1.15.0"
 opentelemetry-instrumentation-grpc = "^0.36b0"
 hf-transfer = "^0.1.2"
 sentencepiece = "^0.1.97"
-peft = "^0.4.0"
-optimum-habana =  "1.10.4"
-transformers = "4.37.2"
+peft = "^0.9.0"
+optimum-habana = "1.11.0"
+transformers = "4.38.2"
 accelerate = "0.27.2"
+outlines= { version = "^0.0.36", optional = true }
 
 [tool.poetry.group.dev.dependencies]
 grpcio-tools = "*"
diff --git a/server/requirements.txt b/server/requirements.txt
index 535ae6bf..4c656ae6 100644
--- a/server/requirements.txt
+++ b/server/requirements.txt
@@ -1,40 +1,42 @@
 accelerate==0.27.2 ; python_version >= "3.9" and python_version < "3.13"
-aiohttp==3.9.0 ; python_version >= "3.9" and python_version < "3.13"
+aiohttp==3.9.5 ; python_version >= "3.9" and python_version < "3.13"
 aiosignal==1.3.1 ; python_version >= "3.9" and python_version < "3.13"
 async-timeout==4.0.3 ; python_version >= "3.9" and python_version < "3.11"
-attrs==23.1.0 ; python_version >= "3.9" and python_version < "3.13"
+attrs==23.2.0 ; python_version >= "3.9" and python_version < "3.13"
 backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2023.11.17 ; python_version >= "3.9" and python_version < "3.13"
+certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
 charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
 click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 coloredlogs==15.0.1 ; python_version >= "3.9" and python_version < "3.13"
-datasets==2.14.7 ; python_version >= "3.9" and python_version < "3.13"
+datasets==2.19.0 ; python_version >= "3.9" and python_version < "3.13"
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 diffusers==0.26.3 ; python_version >= "3.9" and python_version < "3.13"
-dill==0.3.7 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
-frozenlist==1.4.0 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
-fsspec[http]==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.61.0 ; python_version >= "3.9" and python_version < "3.13"
+dill==0.3.8 ; python_version >= "3.9" and python_version < "3.13"
+filelock==3.13.4 ; python_version >= "3.9" and python_version < "3.13"
+frozenlist==1.4.1 ; python_version >= "3.9" and python_version < "3.13"
+fsspec==2024.3.1 ; python_version >= "3.9" and python_version < "3.13"
+fsspec[http]==2024.3.1 ; python_version >= "3.9" and python_version < "3.13"
+googleapis-common-protos==1.63.0 ; python_version >= "3.9" and python_version < "3.13"
 grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
 grpcio-reflection==1.48.2 ; python_version >= "3.9" and python_version < "3.13"
 grpcio-status==1.48.2 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.20.3 ; python_version >= "3.9" and python_version < "3.13"
+grpcio==1.62.2 ; python_version >= "3.9" and python_version < "3.13"
+hf-transfer==0.1.6 ; python_version >= "3.9" and python_version < "3.13"
+huggingface-hub==0.22.2 ; python_version >= "3.9" and python_version < "3.13"
 humanfriendly==10.0 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
-importlib-metadata==7.0.1 ; python_version >= "3.9" and python_version < "3.13"
-jinja2==3.1.2 ; python_version >= "3.9" and python_version < "3.13"
+idna==3.7 ; python_version >= "3.9" and python_version < "3.13"
+importlib-metadata==7.1.0 ; python_version >= "3.9" and python_version < "3.13"
+intel-openmp==2021.4.0 ; python_version >= "3.9" and python_version < "3.13" and platform_system == "Windows"
+jinja2==3.1.3 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-markupsafe==2.1.3 ; python_version >= "3.9" and python_version < "3.13"
+markupsafe==2.1.5 ; python_version >= "3.9" and python_version < "3.13"
+mkl==2021.4.0 ; python_version >= "3.9" and python_version < "3.13" and platform_system == "Windows"
 mpmath==1.3.0 ; python_version >= "3.9" and python_version < "3.13"
-multidict==6.0.4 ; python_version >= "3.9" and python_version < "3.13"
-multiprocess==0.70.15 ; python_version >= "3.9" and python_version < "3.13"
+multidict==6.0.5 ; python_version >= "3.9" and python_version < "3.13"
+multiprocess==0.70.16 ; python_version >= "3.9" and python_version < "3.13"
 networkx==3.2.1 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
+numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@@ -44,37 +46,38 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
 opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
-optimum-habana==1.10.4 ; python_version >= "3.9" and python_version < "3.13"
-optimum==1.17.1 ; python_version >= "3.9" and python_version < "3.13"
-packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
-pandas==2.1.3 ; python_version >= "3.9" and python_version < "3.13"
-peft==0.4.0 ; python_version >= "3.9" and python_version < "3.13"
-pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
+optimum-habana==1.11.0 ; python_version >= "3.9" and python_version < "3.13"
+optimum==1.19.1 ; python_version >= "3.9" and python_version < "3.13"
+packaging==24.0 ; python_version >= "3.9" and python_version < "3.13"
+pandas==2.2.2 ; python_version >= "3.9" and python_version < "3.13"
+peft==0.9.0 ; python_version >= "3.9" and python_version < "3.13"
+pillow==10.3.0 ; python_version >= "3.9" and python_version < "3.13"
 protobuf==3.20.3 ; python_version >= "3.9" and python_version < "3.13"
-psutil==5.9.6 ; python_version >= "3.9" and python_version < "3.13"
+psutil==5.9.8 ; python_version >= "3.9" and python_version < "3.13"
 pyarrow-hotfix==0.6 ; python_version >= "3.9" and python_version < "3.13"
-pyarrow==14.0.1 ; python_version >= "3.9" and python_version < "3.13"
+pyarrow==16.0.0 ; python_version >= "3.9" and python_version < "3.13"
 pyreadline3==3.4.1 ; sys_platform == "win32" and python_version >= "3.9" and python_version < "3.13"
-python-dateutil==2.8.2 ; python_version >= "3.9" and python_version < "3.13"
-pytz==2023.3.post1 ; python_version >= "3.9" and python_version < "3.13"
+python-dateutil==2.9.0.post0 ; python_version >= "3.9" and python_version < "3.13"
+pytz==2024.1 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13"
+regex==2024.4.16 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.4.2 ; python_version >= "3.9" and python_version < "3.13"
+safetensors==0.4.3 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
+setuptools==69.5.1 ; python_version >= "3.9" and python_version < "3.13"
 six==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
 sympy==1.12 ; python_version >= "3.9" and python_version < "3.13"
+tbb==2021.12.0 ; python_version >= "3.9" and python_version < "3.13" and platform_system == "Windows"
 tokenizers==0.15.2 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.37.2 ; python_version >= "3.9" and python_version < "3.13"
-transformers[sentencepiece]==4.37.2 ; python_version >= "3.9" and python_version < "3.13"
+tqdm==4.66.2 ; python_version >= "3.9" and python_version < "3.13"
+transformers==4.38.2 ; python_version >= "3.9" and python_version < "3.13"
+transformers[sentencepiece]==4.38.2 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
-tzdata==2023.3 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
+typing-extensions==4.11.0 ; python_version >= "3.9" and python_version < "3.13"
+tzdata==2024.1 ; python_version >= "3.9" and python_version < "3.13"
+urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
 win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
 wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
 xxhash==3.4.1 ; python_version >= "3.9" and python_version < "3.13"
-yarl==1.9.3 ; python_version >= "3.9" and python_version < "3.13"
-zipp==3.17.0 ; python_version >= "3.9" and python_version < "3.13"
+yarl==1.9.4 ; python_version >= "3.9" and python_version < "3.13"
+zipp==3.18.1 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_common.txt b/server/requirements_common.txt
deleted file mode 100644
index 5a321834..00000000
--- a/server/requirements_common.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2023.11.17 ; python_version >= "3.9" and python_version < "3.13"
-charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
-click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
-colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
-deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
-einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.61.0 ; python_version >= "3.9" and python_version < "3.13"
-grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-reflection==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-status==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
-loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-exporter-otlp==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation-grpc==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
-opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
-packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
-pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13"
-pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13"
-requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.11.4 ; python_version >= "3.9" and python_version < "3.13"
-sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.33.3 ; python_version >= "3.9" and python_version < "3.13"
-typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
-win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
-wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_cuda.txt b/server/requirements_cuda.txt
index bc1b8891..6ad5235a 100644
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@@ -1,23 +1,22 @@
 backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
-bitsandbytes==0.41.2.post2 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2023.11.17 ; python_version >= "3.9" and python_version < "3.13"
+certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
 charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
 click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.61.0 ; python_version >= "3.9" and python_version < "3.13"
+filelock==3.13.3 ; python_version >= "3.9" and python_version < "3.13"
+fsspec==2024.2.0 ; python_version >= "3.9" and python_version < "3.13"
+googleapis-common-protos==1.63.0 ; python_version >= "3.9" and python_version < "3.13"
 grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-reflection==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-status==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
+grpcio-reflection==1.62.1 ; python_version >= "3.9" and python_version < "3.13"
+grpcio-status==1.62.1 ; python_version >= "3.9" and python_version < "3.13"
+grpcio==1.62.1 ; python_version >= "3.9" and python_version < "3.13"
+hf-transfer==0.1.6 ; python_version >= "3.9" and python_version < "3.13"
+huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "3.13"
+idna==3.6 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
+numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@@ -27,21 +26,21 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
 opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
-packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
-pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13"
+packaging==24.0 ; python_version >= "3.9" and python_version < "3.13"
+pillow==10.3.0 ; python_version >= "3.9" and python_version < "3.13"
+protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13"
+regex==2023.12.25 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.11.4 ; python_version >= "3.9" and python_version < "3.13"
+safetensors==0.4.2 ; python_version >= "3.9" and python_version < "3.13"
+scipy==1.13.0 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.33.3 ; python_version >= "3.9" and python_version < "3.13"
+setuptools==69.2.0 ; python_version >= "3.9" and python_version < "3.13"
+tokenizers==0.15.2 ; python_version >= "3.9" and python_version < "3.13"
+tqdm==4.66.2 ; python_version >= "3.9" and python_version < "3.13"
+transformers==4.39.3 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
+typing-extensions==4.11.0 ; python_version >= "3.9" and python_version < "3.13"
+urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
 win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
 wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/requirements_rocm.txt b/server/requirements_rocm.txt
index 5a321834..6ad5235a 100644
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@@ -1,22 +1,22 @@
 backoff==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
-certifi==2023.11.17 ; python_version >= "3.9" and python_version < "3.13"
+certifi==2024.2.2 ; python_version >= "3.9" and python_version < "3.13"
 charset-normalizer==3.3.2 ; python_version >= "3.9" and python_version < "3.13"
 click==8.1.7 ; python_version >= "3.9" and python_version < "3.13"
 colorama==0.4.6 ; python_version >= "3.9" and python_version < "3.13" and (sys_platform == "win32" or platform_system == "Windows")
 deprecated==1.2.14 ; python_version >= "3.9" and python_version < "3.13"
 einops==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-filelock==3.13.1 ; python_version >= "3.9" and python_version < "3.13"
-fsspec==2023.10.0 ; python_version >= "3.9" and python_version < "3.13"
-googleapis-common-protos==1.61.0 ; python_version >= "3.9" and python_version < "3.13"
+filelock==3.13.3 ; python_version >= "3.9" and python_version < "3.13"
+fsspec==2024.2.0 ; python_version >= "3.9" and python_version < "3.13"
+googleapis-common-protos==1.63.0 ; python_version >= "3.9" and python_version < "3.13"
 grpc-interceptor==0.15.4 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-reflection==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio-status==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-grpcio==1.59.3 ; python_version >= "3.9" and python_version < "3.13"
-hf-transfer==0.1.4 ; python_version >= "3.9" and python_version < "3.13"
-huggingface-hub==0.16.4 ; python_version >= "3.9" and python_version < "3.13"
-idna==3.4 ; python_version >= "3.9" and python_version < "3.13"
+grpcio-reflection==1.62.1 ; python_version >= "3.9" and python_version < "3.13"
+grpcio-status==1.62.1 ; python_version >= "3.9" and python_version < "3.13"
+grpcio==1.62.1 ; python_version >= "3.9" and python_version < "3.13"
+hf-transfer==0.1.6 ; python_version >= "3.9" and python_version < "3.13"
+huggingface-hub==0.19.4 ; python_version >= "3.9" and python_version < "3.13"
+idna==3.6 ; python_version >= "3.9" and python_version < "3.13"
 loguru==0.6.0 ; python_version >= "3.9" and python_version < "3.13"
-numpy==1.26.2 ; python_version >= "3.9" and python_version < "3.13"
+numpy==1.26.4 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-api==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-grpc==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-exporter-otlp-proto-http==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
@@ -26,21 +26,21 @@ opentelemetry-instrumentation==0.36b0 ; python_version >= "3.9" and python_versi
 opentelemetry-proto==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-sdk==1.15.0 ; python_version >= "3.9" and python_version < "3.13"
 opentelemetry-semantic-conventions==0.36b0 ; python_version >= "3.9" and python_version < "3.13"
-packaging==23.2 ; python_version >= "3.9" and python_version < "3.13"
-pillow==10.1.0 ; python_version >= "3.9" and python_version < "3.13"
-protobuf==4.25.1 ; python_version >= "3.9" and python_version < "3.13"
+packaging==24.0 ; python_version >= "3.9" and python_version < "3.13"
+pillow==10.3.0 ; python_version >= "3.9" and python_version < "3.13"
+protobuf==4.25.3 ; python_version >= "3.9" and python_version < "3.13"
 pyyaml==6.0.1 ; python_version >= "3.9" and python_version < "3.13"
-regex==2023.10.3 ; python_version >= "3.9" and python_version < "3.13"
+regex==2023.12.25 ; python_version >= "3.9" and python_version < "3.13"
 requests==2.31.0 ; python_version >= "3.9" and python_version < "3.13"
-safetensors==0.3.3 ; python_version >= "3.9" and python_version < "3.13"
-scipy==1.11.4 ; python_version >= "3.9" and python_version < "3.13"
+safetensors==0.4.2 ; python_version >= "3.9" and python_version < "3.13"
+scipy==1.13.0 ; python_version >= "3.9" and python_version < "3.13"
 sentencepiece==0.1.99 ; python_version >= "3.9" and python_version < "3.13"
-setuptools==69.0.2 ; python_version >= "3.9" and python_version < "3.13"
-tokenizers==0.13.3 ; python_version >= "3.9" and python_version < "3.13"
-tqdm==4.66.1 ; python_version >= "3.9" and python_version < "3.13"
-transformers==4.33.3 ; python_version >= "3.9" and python_version < "3.13"
+setuptools==69.2.0 ; python_version >= "3.9" and python_version < "3.13"
+tokenizers==0.15.2 ; python_version >= "3.9" and python_version < "3.13"
+tqdm==4.66.2 ; python_version >= "3.9" and python_version < "3.13"
+transformers==4.39.3 ; python_version >= "3.9" and python_version < "3.13"
 typer==0.6.1 ; python_version >= "3.9" and python_version < "3.13"
-typing-extensions==4.8.0 ; python_version >= "3.9" and python_version < "3.13"
-urllib3==2.1.0 ; python_version >= "3.9" and python_version < "3.13"
+typing-extensions==4.11.0 ; python_version >= "3.9" and python_version < "3.13"
+urllib3==2.2.1 ; python_version >= "3.9" and python_version < "3.13"
 win32-setctime==1.1.0 ; python_version >= "3.9" and python_version < "3.13" and sys_platform == "win32"
 wrapt==1.16.0 ; python_version >= "3.9" and python_version < "3.13"
diff --git a/server/tests/models/test_bloom.py b/server/tests/models/test_bloom.py
index 1f70d000..4b7dde81 100644
--- a/server/tests/models/test_bloom.py
+++ b/server/tests/models/test_bloom.py
@@ -105,7 +105,7 @@ def test_causal_lm_batch_type(default_bloom):
 @pytest.mark.skip
 def test_causal_lm_generate_token(default_bloom, default_bloom_batch):
     sequence_length = len(default_bloom_batch.all_input_ids[0])
-    generations, next_batch = default_bloom.generate_token(default_bloom_batch)
+    generations, next_batch, _ = default_bloom.generate_token(default_bloom_batch)
 
     assert len(generations) == len(default_bloom_batch)
     assert isinstance(next_batch, CausalLMBatch)
@@ -135,8 +135,20 @@ def test_causal_lm_generate_token(default_bloom, default_bloom_batch):
     )
     assert all([generation.generated_text is None for generation in generations])
     assert all([len(generation.prefill_tokens) == 1 for generation in generations])
-    assert all([generation.token_id.item() == 10264 for generation in generations])
-    assert all([generation.token_text == "Test" for generation in generations])
+    assert all(
+        [
+            token_id.item() == 10264
+            for generation in generations
+            for token_id in generation.tokens.token_ids
+        ]
+    )
+    assert all(
+        [
+            token_text == "Test"
+            for generation in generations
+            for token_text in generation.tokens.texts
+        ]
+    )
     assert generations[0].request_id == 0
 
 
@@ -144,10 +156,10 @@ def test_causal_lm_generate_token(default_bloom, default_bloom_batch):
 def test_causal_lm_generate_token_completion(default_bloom, default_bloom_batch):
     next_batch = default_bloom_batch
     for _ in range(default_bloom_batch.stopping_criterias[0].max_new_tokens - 1):
-        generations, next_batch = default_bloom.generate_token(next_batch)
+        generations, next_batch, _ = default_bloom.generate_token(next_batch)
         assert len(generations) == len(default_bloom_batch)
 
-    generations, next_batch = default_bloom.generate_token(next_batch)
+    generations, next_batch, _ = default_bloom.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -170,10 +182,10 @@ def test_causal_lm_generate_token_completion_multi(
     for i in range(
         default_multi_requests_bloom_batch.stopping_criterias[1].max_new_tokens - 1
     ):
-        generations, next_batch = default_bloom.generate_token(next_batch)
+        generations, next_batch, _ = default_bloom.generate_token(next_batch)
         assert len(generations) == len(default_multi_requests_bloom_batch)
 
-    generations, next_batch = default_bloom.generate_token(next_batch)
+    generations, next_batch, _ = default_bloom.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 2
@@ -193,10 +205,10 @@ def test_causal_lm_generate_token_completion_multi(
     for _ in range(
         stopping_criterias[0].max_new_tokens - stopping_criterias[1].max_new_tokens - 1
     ):
-        generations, next_batch = default_bloom.generate_token(next_batch)
+        generations, next_batch, _ = default_bloom.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_bloom.generate_token(next_batch)
+    generations, next_batch, _ = default_bloom.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -217,11 +229,11 @@ def test_batch_concatenate(
     default_bloom, default_bloom_batch, default_multi_requests_bloom_batch
 ):
     next_batch_0 = default_bloom_batch
-    _, next_batch_0 = default_bloom.generate_token(next_batch_0)
-    _, next_batch_0 = default_bloom.generate_token(next_batch_0)
+    _, next_batch_0, _ = default_bloom.generate_token(next_batch_0)
+    _, next_batch_0, _ = default_bloom.generate_token(next_batch_0)
 
     next_batch_1 = default_multi_requests_bloom_batch
-    _, next_batch_1 = default_bloom.generate_token(next_batch_1)
+    _, next_batch_1, _ = default_bloom.generate_token(next_batch_1)
 
     # Clone past_key_values before concatenating to compare after,
     # because they are removed from the concatenated batches
@@ -281,10 +293,10 @@ def test_batch_concatenate(
     for _ in range(
         default_multi_requests_bloom_batch.stopping_criterias[1].max_new_tokens - 2
     ):
-        generations, next_batch = default_bloom.generate_token(next_batch)
+        generations, next_batch, _ = default_bloom.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_bloom.generate_token(next_batch)
+    generations, next_batch, _ = default_bloom.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 3
@@ -306,10 +318,10 @@ def test_batch_concatenate(
         - default_multi_requests_bloom_batch.stopping_criterias[1].max_new_tokens
         - 2
     ):
-        generations, next_batch = default_bloom.generate_token(next_batch)
+        generations, next_batch, _ = default_bloom.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_bloom.generate_token(next_batch)
+    generations, next_batch, _ = default_bloom.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 2
@@ -330,10 +342,10 @@ def test_batch_concatenate(
         - default_multi_requests_bloom_batch.stopping_criterias[1].max_new_tokens
         - 4
     ):
-        generations, next_batch = default_bloom.generate_token(next_batch)
+        generations, next_batch, _ = default_bloom.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_bloom.generate_token(next_batch)
+    generations, next_batch, _ = default_bloom.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
diff --git a/server/tests/models/test_causal_lm.py b/server/tests/models/test_causal_lm.py
index e467d291..00c312a8 100644
--- a/server/tests/models/test_causal_lm.py
+++ b/server/tests/models/test_causal_lm.py
@@ -111,7 +111,9 @@ def test_causal_lm_batch_type(default_causal_lm):
 
 def test_causal_lm_generate_token(default_causal_lm, default_causal_lm_batch):
     sequence_length = len(default_causal_lm_batch.all_input_ids[0])
-    generations, next_batch = default_causal_lm.generate_token(default_causal_lm_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(
+        default_causal_lm_batch
+    )
 
     assert len(generations) == len(next_batch)
     assert isinstance(next_batch, CausalLMBatch)
@@ -141,8 +143,20 @@ def test_causal_lm_generate_token(default_causal_lm, default_causal_lm_batch):
     )
     assert all([generation.generated_text is None for generation in generations])
     assert all([len(generation.prefill_tokens) == 1 for generation in generations])
-    assert all([generation.token_id.item() == 13 for generation in generations])
-    assert all([generation.token_text == "." for generation in generations])
+    assert all(
+        [
+            token_id.item() == 13
+            for generation in generations
+            for token_id in generation.tokens.token_ids
+        ]
+    )
+    assert all(
+        [
+            token_text == "."
+            for generation in generations
+            for token_text in generation.tokens.texts
+        ]
+    )
     assert generations[0].request_id == 0
 
 
@@ -151,10 +165,10 @@ def test_causal_lm_generate_token_completion(
 ):
     next_batch = default_causal_lm_batch
     for _ in range(default_causal_lm_batch.stopping_criterias[0].max_new_tokens - 1):
-        generations, next_batch = default_causal_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_causal_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -174,10 +188,10 @@ def test_causal_lm_generate_token_completion_multi(
     for i in range(
         default_multi_requests_causal_lm_batch.stopping_criterias[1].max_new_tokens - 1
     ):
-        generations, next_batch = default_causal_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_causal_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 2
@@ -200,10 +214,10 @@ def test_causal_lm_generate_token_completion_multi(
     for _ in range(
         stopping_criterias[0].max_new_tokens - stopping_criterias[1].max_new_tokens - 1
     ):
-        generations, next_batch = default_causal_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_causal_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -222,11 +236,11 @@ def test_batch_concatenate(
     default_causal_lm, default_causal_lm_batch, default_multi_requests_causal_lm_batch
 ):
     next_batch_0 = default_causal_lm_batch
-    _, next_batch_0 = default_causal_lm.generate_token(next_batch_0)
-    _, next_batch_0 = default_causal_lm.generate_token(next_batch_0)
+    _, next_batch_0, _ = default_causal_lm.generate_token(next_batch_0)
+    _, next_batch_0, _ = default_causal_lm.generate_token(next_batch_0)
 
     next_batch_1 = default_multi_requests_causal_lm_batch
-    _, next_batch_1 = default_causal_lm.generate_token(next_batch_1)
+    _, next_batch_1, _ = default_causal_lm.generate_token(next_batch_1)
 
     # Clone past_key_values before concatenating to compare after,
     # because they are removed from the concatenated batches
@@ -285,10 +299,10 @@ def test_batch_concatenate(
     for _ in range(
         default_multi_requests_causal_lm_batch.stopping_criterias[1].max_new_tokens - 2
     ):
-        generations, next_batch = default_causal_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_causal_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 3
@@ -311,10 +325,10 @@ def test_batch_concatenate(
         - default_multi_requests_causal_lm_batch.stopping_criterias[1].max_new_tokens
         - 2
     ):
-        generations, next_batch = default_causal_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_causal_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 2
@@ -333,10 +347,10 @@ def test_batch_concatenate(
         - default_multi_requests_causal_lm_batch.stopping_criterias[1].max_new_tokens
         - 4
     ):
-        generations, next_batch = default_causal_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_causal_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_causal_lm.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
diff --git a/server/tests/models/test_santacoder.py b/server/tests/models/test_santacoder.py
index fceec560..1e40e766 100644
--- a/server/tests/models/test_santacoder.py
+++ b/server/tests/models/test_santacoder.py
@@ -55,10 +55,10 @@ def test_santacoder_generate_token_completion(default_santacoder, default_pb_bat
     next_batch = batch
 
     for _ in range(batch.stopping_criterias[0].max_new_tokens - 1):
-        generations, next_batch = default_santacoder.generate_token(next_batch)
+        generations, next_batch, _ = default_santacoder.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_santacoder.generate_token(next_batch)
+    generations, next_batch, _ = default_santacoder.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -83,10 +83,10 @@ def test_fim_santacoder_generate_token_completion(
     next_batch = batch
 
     for _ in range(batch.stopping_criterias[0].max_new_tokens - 1):
-        generations, next_batch = default_santacoder.generate_token(next_batch)
+        generations, next_batch, _ = default_santacoder.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_santacoder.generate_token(next_batch)
+    generations, next_batch, _ = default_santacoder.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
diff --git a/server/tests/models/test_seq2seq_lm.py b/server/tests/models/test_seq2seq_lm.py
index 2b59f731..ba9f5578 100644
--- a/server/tests/models/test_seq2seq_lm.py
+++ b/server/tests/models/test_seq2seq_lm.py
@@ -107,7 +107,7 @@ def test_seq2seq_lm_batch_type(default_seq2seq_lm):
 @pytest.mark.skip("seq2seq model not enabled on HPU yet")
 def test_seq2seq_lm_generate_token(default_seq2seq_lm, default_seq2seq_lm_batch):
     sequence_length = len(default_seq2seq_lm_batch.input_ids[0])
-    generations, next_batch = default_seq2seq_lm.generate_token(
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(
         default_seq2seq_lm_batch
     )
 
@@ -155,8 +155,20 @@ def test_seq2seq_lm_generate_token(default_seq2seq_lm, default_seq2seq_lm_batch)
     )
     assert all([generation.generated_text is None for generation in generations])
     assert all([len(generation.prefill_tokens) == 1 for generation in generations])
-    assert all([generation.token_id.item() == 259 for generation in generations])
-    assert all([generation.token_text == " " for generation in generations])
+    assert all(
+        [
+            token_id.item() == 259
+            for generation in generations
+            for token_id in generation.tokens.token_ids
+        ]
+    )
+    assert all(
+        [
+            token_text == " "
+            for generation in generations
+            for token_text in generation.tokens.texts
+        ]
+    )
     assert generations[0].request_id == 0
 
 
@@ -166,10 +178,10 @@ def test_seq2seq_lm_generate_token_completion(
 ):
     next_batch = default_seq2seq_lm_batch
     for _ in range(6):
-        generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -185,10 +197,10 @@ def test_seq2seq_lm_generate_token_completion_multi(
     next_batch = default_multi_requests_seq2seq_lm_batch
 
     for i in range(4):
-        generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 2
@@ -201,10 +213,10 @@ def test_seq2seq_lm_generate_token_completion_multi(
 
     next_batch = next_batch.filter([next_batch.requests[0].id])
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
@@ -223,11 +235,11 @@ def test_batch_concatenate(
     default_multi_requests_seq2seq_lm_batch,
 ):
     next_batch_0 = default_seq2seq_lm_batch
-    _, next_batch_0 = default_seq2seq_lm.generate_token(next_batch_0)
-    _, next_batch_0 = default_seq2seq_lm.generate_token(next_batch_0)
+    _, next_batch_0, _ = default_seq2seq_lm.generate_token(next_batch_0)
+    _, next_batch_0, _ = default_seq2seq_lm.generate_token(next_batch_0)
 
     next_batch_1 = default_multi_requests_seq2seq_lm_batch
-    _, next_batch_1 = default_seq2seq_lm.generate_token(next_batch_1)
+    _, next_batch_1, _ = default_seq2seq_lm.generate_token(next_batch_1)
 
     # Copy hidden state because it is removed from the concatenated branches
     next_batch_0_encoder_last_hidden_state = next_batch_0.encoder_last_hidden_state
@@ -319,10 +331,10 @@ def test_batch_concatenate(
         )
 
     for _ in range(3):
-        generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+        generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
         assert len(generations) == len(next_batch)
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 3
@@ -337,7 +349,7 @@ def test_batch_concatenate(
         [next_batch.requests[0].id, next_batch.requests[1].id]
     )
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert next_batch is not None
 
     assert len(generations) == 2
@@ -347,7 +359,7 @@ def test_batch_concatenate(
 
     next_batch = next_batch.filter([next_batch.requests[1].id])
 
-    generations, next_batch = default_seq2seq_lm.generate_token(next_batch)
+    generations, next_batch, _ = default_seq2seq_lm.generate_token(next_batch)
     assert next_batch is None
 
     assert len(generations) == 1
diff --git a/server/tests/utils/test_hub.py b/server/tests/utils/test_hub.py
index fac9a64d..721820f5 100644
--- a/server/tests/utils/test_hub.py
+++ b/server/tests/utils/test_hub.py
@@ -1,5 +1,13 @@
+import os
+import requests
+import tempfile
+
 import pytest
 
+import huggingface_hub.constants
+from huggingface_hub import hf_api
+
+import text_generation_server.utils.hub
 from text_generation_server.utils.hub import (
     weight_hub_files,
     download_weights,
@@ -10,6 +18,60 @@ from text_generation_server.utils.hub import (
 )
 
 
+@pytest.fixture()
+def offline():
+    current_value = text_generation_server.utils.hub.HF_HUB_OFFLINE
+    text_generation_server.utils.hub.HF_HUB_OFFLINE = True
+    yield "offline"
+    text_generation_server.utils.hub.HF_HUB_OFFLINE = current_value
+
+
+@pytest.fixture()
+def fresh_cache():
+    with tempfile.TemporaryDirectory() as d:
+        current_value = huggingface_hub.constants.HUGGINGFACE_HUB_CACHE
+        huggingface_hub.constants.HUGGINGFACE_HUB_CACHE = d
+        text_generation_server.utils.hub.HUGGINGFACE_HUB_CACHE = d
+        os.environ["HUGGINGFACE_HUB_CACHE"] = d
+        yield
+        huggingface_hub.constants.HUGGINGFACE_HUB_CACHE = current_value
+        os.environ["HUGGINGFACE_HUB_CACHE"] = current_value
+        text_generation_server.utils.hub.HUGGINGFACE_HUB_CACHE = current_value
+
+
+@pytest.fixture()
+def prefetched():
+    model_id = "bert-base-uncased"
+    huggingface_hub.snapshot_download(
+        repo_id=model_id,
+        revision="main",
+        local_files_only=False,
+        repo_type="model",
+        allow_patterns=["*.safetensors"],
+    )
+    yield model_id
+
+
+def test_weight_hub_files_offline_error(offline, fresh_cache):
+    # If the model is not prefetched then it will raise an error
+    with pytest.raises(EntryNotFoundError):
+        weight_hub_files("gpt2")
+
+
+def test_weight_hub_files_offline_ok(prefetched, offline):
+    # If the model is prefetched then we should be able to get the weight files from local cache
+    filenames = weight_hub_files(prefetched)
+    root = None
+    assert len(filenames) == 1
+    for f in filenames:
+        curroot, filename = os.path.split(f)
+        if root is None:
+            root = curroot
+        else:
+            assert root == curroot
+        assert filename == "model.safetensors"
+
+
 def test_weight_hub_files():
     filenames = weight_hub_files("bigscience/bloom-560m")
     assert filenames == ["model.safetensors"]
@@ -33,8 +95,11 @@ def test_download_weights():
     assert files == local_files
 
 
-def test_weight_files_error():
+def test_weight_files_revision_error():
     with pytest.raises(RevisionNotFoundError):
         weight_files("bigscience/bloom-560m", revision="error")
+
+
+def test_weight_files_not_cached_error(fresh_cache):
     with pytest.raises(LocalEntryNotFoundError):
         weight_files("bert-base-uncased")
diff --git a/server/tests/utils/test_layers.py b/server/tests/utils/test_layers.py
new file mode 100644
index 00000000..93a0e982
--- /dev/null
+++ b/server/tests/utils/test_layers.py
@@ -0,0 +1,77 @@
+import torch
+from text_generation_server.utils.layers import (
+    TensorParallelEmbedding,
+)
+
+
+class ProcessGroup:
+    def __init__(self, rank: int, world_size: int):
+        self._rank = rank
+        self.world_size = world_size
+
+    def size(self) -> int:
+        return self.world_size
+
+    def rank(self) -> int:
+        return self._rank
+
+
+class Weights:
+    def __init__(self, rank: int, world_size: int, vocab_size: int, hidden_dim: int):
+        self.weight = (
+            torch.arange(vocab_size * hidden_dim).float().view(vocab_size, hidden_dim)
+        )
+        self.process_group = ProcessGroup(rank, world_size)
+
+    def get_partial_sharded(self, name: str, dim: int):
+        assert dim == 0
+
+        rank = self.process_group.rank()
+        world_size = self.process_group.size()
+        size = self.weight.shape[dim]
+
+        block_size = (size + world_size - 1) // world_size
+        start = rank * block_size
+        stop = (rank + 1) * block_size
+        return self.weight[start:stop]
+
+    def get_shape(self, name: str):
+        return self.weight.shape
+
+
+def test_weight_hub_files_offline_error():
+
+    vocab_size = 17
+    weights = Weights(rank=0, world_size=1, vocab_size=vocab_size, hidden_dim=256)
+    embeddings = TensorParallelEmbedding("", weights)
+
+    input_ids = torch.arange(vocab_size)
+    output = embeddings.forward(input_ids)
+    assert embeddings.min_id == 0
+    assert embeddings.max_id == 17
+    torch.testing.assert_close(output, torch.arange(256 * 17).float().view(17, 256))
+
+    weights_0_2 = Weights(rank=0, world_size=2, vocab_size=vocab_size, hidden_dim=256)
+    weights_1_2 = Weights(rank=1, world_size=2, vocab_size=vocab_size, hidden_dim=256)
+    embeddings_0_2 = TensorParallelEmbedding("", weights_0_2, reduce=False)
+    assert embeddings_0_2.min_id == 0
+    assert embeddings_0_2.max_id == 9
+    torch.testing.assert_close(
+        embeddings_0_2.weight,
+        torch.cat([torch.arange(9 * 256), torch.zeros(256)], dim=0)
+        .view(10, 256)
+        .float(),
+    )
+    embeddings_1_2 = TensorParallelEmbedding("", weights_1_2, reduce=False)
+    assert embeddings_1_2.min_id == 9
+    assert embeddings_1_2.max_id == 17
+    torch.testing.assert_close(
+        embeddings_1_2.weight,
+        torch.cat([torch.arange(8 * 256) + 9 * 256, torch.zeros(256)], dim=0)
+        .view(9, 256)
+        .float(),
+    )
+    output_tp_0 = embeddings_0_2.forward(input_ids)
+    output_tp_1 = embeddings_1_2.forward(input_ids)
+
+    torch.testing.assert_close(output, output_tp_0 + output_tp_1)
diff --git a/server/tests/utils/test_tokens.py b/server/tests/utils/test_tokens.py
index 39446fd5..94d2a8f2 100644
--- a/server/tests/utils/test_tokens.py
+++ b/server/tests/utils/test_tokens.py
@@ -64,30 +64,50 @@ def test_batch_top_tokens():
     top_n_tokens = [0, 2, 3, 4, 5]
     top_n_tokens_tensor = torch.tensor(top_n_tokens)
     inp_logprobs = torch.tensor([[-1.0, -3.0, -4.0, -2.0, -3.0]] * 5)
+    accepted_ids = torch.ones_like(top_n_tokens_tensor)
 
     topn_tok_ids, topn_tok_logprobs = batch_top_tokens(
-        top_n_tokens, top_n_tokens_tensor, inp_logprobs
+        top_n_tokens, top_n_tokens_tensor, inp_logprobs, accepted_ids
     )
 
-    assert topn_tok_ids[0] == []
-    assert topn_tok_ids[1] == [0, 3]
-    assert topn_tok_ids[2] == [0, 3, 1, 4]
-    assert topn_tok_ids[3] == [0, 3, 1, 4]
-    assert topn_tok_ids[4] == [0, 3, 1, 4, 2]
+    assert topn_tok_ids[0] == [[]]
+    assert topn_tok_ids[1] == [[0, 3]]
+    assert topn_tok_ids[2] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[3] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[4] == [[0, 3, 1, 4, 2]]
 
-    assert topn_tok_logprobs[0] == []
-    assert topn_tok_logprobs[1] == [-1, -2]
-    assert topn_tok_logprobs[2] == [-1, -2, -3, -3]
-    assert topn_tok_logprobs[3] == [-1, -2, -3, -3]
-    assert topn_tok_logprobs[4] == [-1, -2, -3, -3, -4]
+    assert topn_tok_logprobs[0] == [[]]
+    assert topn_tok_logprobs[1] == [[-1, -2]]
+    assert topn_tok_logprobs[2] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[3] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[4] == [[-1, -2, -3, -3, -4]]
+
+    # Now let's make second member of the batch be speculated
+    inp_logprobs = torch.tensor([[-1.0, -3.0, -4.0, -2.0, -3.0]] * 5 * 2)
+    accepted_ids[1] = 2
+    topn_tok_ids, topn_tok_logprobs = batch_top_tokens(
+        top_n_tokens, top_n_tokens_tensor, inp_logprobs, accepted_ids
+    )
+
+    assert topn_tok_ids[0] == [[]]
+    assert topn_tok_ids[1] == [[0, 3], [0, 3]]
+    assert topn_tok_ids[2] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[3] == [[0, 3, 1, 4]]
+    assert topn_tok_ids[4] == [[0, 3, 1, 4, 2]]
+
+    assert topn_tok_logprobs[0] == [[]]
+    assert topn_tok_logprobs[1] == [[-1, -2], [-1, -2]]
+    assert topn_tok_logprobs[2] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[3] == [[-1, -2, -3, -3]]
+    assert topn_tok_logprobs[4] == [[-1, -2, -3, -3, -4]]
 
 
 def test_pass_through_tokenizer(skip_tokenizer_env_var):
     tokenizer = AutoTokenizer.from_pretrained(
-            'meta-llama/Llama-2-7b-chat-hf',
-            revision=None,
-            padding_side="left",
-            truncation_side="left",
+        'meta-llama/Llama-2-7b-chat-hf',
+        revision=None,
+        padding_side="left",
+        truncation_side="left",
     )
     tokenizer.pad_token_id = 2
     make_tokenizer_optional(tokenizer)
@@ -109,4 +129,4 @@ def test_pass_through_tokenizer(skip_tokenizer_env_var):
 
 
 if __name__ == "__main__":
-    test_pass_through_tokenizer()
\ No newline at end of file
+    test_pass_through_tokenizer()
diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py
index fead2297..990c31be 100644
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@@ -10,6 +10,7 @@ from pathlib import Path
 from loguru import logger
 from typing import Optional
 from enum import Enum
+from huggingface_hub import hf_hub_download
 
 
 app = typer.Typer()
@@ -18,6 +19,9 @@ app = typer.Typer()
 class Quantization(str, Enum):
     bitsandbytes = "bitsandbytes"
     gptq = "gptq"
+    awq = "awq"
+    eetq = "eetq"
+    fp8 = "fp8"
 
 
 class Dtype(str, Enum):
@@ -31,6 +35,7 @@ def serve(
     revision: Optional[str] = None,
     sharded: bool = False,
     quantize: Optional[Quantization] = None,
+    speculate: Optional[int] = None,
     dtype: Optional[Dtype] = None,
     trust_remote_code: bool = False,
     uds_path: Path = "/tmp/text-generation-server",
@@ -39,9 +44,15 @@ def serve(
     otlp_endpoint: Optional[str] = None,
 ):
     if sharded:
-        assert os.getenv("WORLD_SIZE", None) is not None, "WORLD_SIZE must be set when sharded is True"
-        assert os.getenv("MASTER_ADDR", None) is not None, "MASTER_ADDR must be set when sharded is True"
-        assert os.getenv("MASTER_PORT", None) is not None, "MASTER_PORT must be set when sharded is True"
+        assert (
+            os.getenv("WORLD_SIZE", None) is not None
+        ), "WORLD_SIZE must be set when sharded is True"
+        assert (
+            os.getenv("MASTER_ADDR", None) is not None
+        ), "MASTER_ADDR must be set when sharded is True"
+        assert (
+            os.getenv("MASTER_PORT", None) is not None
+        ), "MASTER_PORT must be set when sharded is True"
 
     # Remove default handler
     logger.remove()
@@ -75,7 +86,11 @@ def serve(
         logger.info("CLI SHARDED = {}".format(num_shard))
         import subprocess
 
-        cmd = f"deepspeed --num_nodes 1 --num_gpus {num_shard} --no_local_rank {tgi_file} --model_id {model_id} --revision {revision} --sharded {sharded} --dtype {dtype} --uds_path {uds_path}"
+        cmd = f"deepspeed --num_nodes 1 --num_gpus {num_shard} --no_local_rank {tgi_file}"
+        cmd += f" --model_id {model_id} --revision {revision} --sharded {sharded}"
+        cmd += f" --dtype {dtype} --trust_remote_code {trust_remote_code} --uds_path {uds_path}"
+        if speculate is not None:
+            cmd += f"--speculate {speculate}"
         logger.info("CLI server start deepspeed ={} ".format(cmd))
         sys.stdout.flush()
         sys.stderr.flush()
@@ -119,7 +134,15 @@ def serve(
                 logger.error(f"{cmd}  exited with status = {proc.returncode}")
                 return proc.returncode
     else:
-        server.serve(model_id, revision, dtype, uds_path, sharded)
+        server.serve(
+            model_id,
+            revision,
+            sharded,
+            speculate,
+            dtype,
+            trust_remote_code,
+            uds_path
+        )
 
 
 @app.command()
@@ -153,7 +176,7 @@ def download_weights(
         logger.info("Files are already present on the host. " "Skipping download.")
         return
     # Local files not found
-    except (utils.LocalEntryNotFoundError, FileNotFoundError):
+    except (utils.LocalEntryNotFoundError, FileNotFoundError, utils.EntryNotFoundError):
         pass
 
     is_local_model = (Path(model_id).exists() and Path(model_id).is_dir()) or os.getenv(
@@ -161,6 +184,50 @@ def download_weights(
     ) is not None
 
     if not is_local_model:
+        try:
+            adapter_config_filename = hf_hub_download(
+                model_id, revision=revision, filename="adapter_config.json"
+            )
+            utils.download_and_unload_peft(
+                model_id, revision, trust_remote_code=trust_remote_code
+            )
+            is_local_model = True
+            utils.weight_files(model_id, revision, extension)
+            return
+        except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
+            pass
+
+        try:
+            import json
+
+            medusa_head = hf_hub_download(
+                model_id, revision=revision, filename="medusa_lm_head.safetensors"
+            )
+            medusa_config = hf_hub_download(
+                model_id, revision=revision, filename="config.json"
+            )
+            with open(medusa_config, "r") as f:
+                config = json.load(f)
+
+            model_id = config["base_model_name_or_path"]
+            revision = "main"
+            try:
+                utils.weight_files(model_id, revision, extension)
+                logger.info(
+                    f"Files for parent {model_id} are already present on the host. "
+                    "Skipping download."
+                )
+                return
+            # Local files not found
+            except (
+                utils.LocalEntryNotFoundError,
+                FileNotFoundError,
+                utils.EntryNotFoundError,
+            ):
+                pass
+        except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
+            pass
+
         # Try to download weights from the hub
         try:
             filenames = utils.weight_hub_files(model_id, revision, extension)
@@ -174,7 +241,32 @@ def download_weights(
             if not extension == ".safetensors" or not auto_convert:
                 raise e
 
-    else:
+    elif (Path(model_id) / "medusa_lm_head.safetensors").exists():
+        # Try to load as a local Medusa model
+        try:
+            import json
+
+            medusa_head = Path(model_id) / "medusa_lm_head.safetensors"
+            medusa_config = Path(model_id) / "config.json"
+            with open(medusa_config, "r") as f:
+                config = json.load(f)
+
+            model_id = config["base_model_name_or_path"]
+            revision = "main"
+            try:
+                utils.weight_files(model_id, revision, extension)
+                logger.info(
+                    f"Files for parent {model_id} are already present on the host. "
+                    "Skipping download."
+                )
+                return
+            # Local files not found
+            except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
+                pass
+        except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
+            pass
+
+    elif (Path(model_id) / "adapter_config.json").exists():
         # Try to load as a local PEFT model
         try:
             utils.download_and_unload_peft(
@@ -204,6 +296,13 @@ def download_weights(
         local_pt_files = utils.download_weights(pt_filenames, model_id, revision)
 
     if auto_convert:
+        if not trust_remote_code:
+            logger.warning(
+                f"🚨🚨BREAKING CHANGE in 2.0🚨🚨: Safetensors conversion is disabled without `--trust-remote-code` because "
+                f"Pickle files are unsafe and can essentially contain remote code execution!"
+                f"Please check for more information here: https://huggingface.co/docs/text-generation-inference/basic_tutorials/safety",
+            )
+
         logger.warning(
             f"No safetensors weights found for model {model_id} at revision {revision}. "
             f"Converting PyTorch weights to safetensors."
diff --git a/server/text_generation_server/interceptor.py b/server/text_generation_server/interceptor.py
index cde71de3..05339282 100644
--- a/server/text_generation_server/interceptor.py
+++ b/server/text_generation_server/interceptor.py
@@ -28,6 +28,10 @@ class ExceptionInterceptor(AsyncServerInterceptor):
             method_name = method_name.split("/")[-1]
             logger.exception(f"Method {method_name} encountered an error.")
 
+            # Runtime Error cannot be recovered from
+            if isinstance(err, RuntimeError):
+                exit(1)
+
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
 
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index ce252ba1..7eb9669f 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -1,10 +1,16 @@
 import torch
 
 from loguru import logger
+from transformers.configuration_utils import PretrainedConfig
 from transformers.models.auto import modeling_auto
-from transformers import AutoConfig
+from huggingface_hub import hf_hub_download
 from typing import Optional
+from pathlib import Path
 
+# Needed to properly setup habana_frameworks
+import text_generation_server.habana_quantization_env as hq_env
+
+from text_generation_server.utils.speculate import get_speculate, set_speculate
 from text_generation_server.models.model import Model
 from text_generation_server.models.causal_lm import CausalLM
 from text_generation_server.models.bloom import BLOOM
@@ -20,19 +26,90 @@ torch.set_grad_enabled(False)
 def get_model(
     model_id: str,
     revision: Optional[str],
-    dtype: Optional[torch.dtype] = None,
+    speculate: Optional[int],
+    dtype: Optional[torch.dtype],
+    trust_remote_code: bool,
 ) -> Model:
     adapt_transformers_to_gaudi()
-    config = AutoConfig.from_pretrained(model_id, revision=revision)
-    model_type = config.model_type
+
+    if speculate is not None:
+        set_speculate(speculate)
+    else:
+        set_speculate(0)
+
+    config_dict, _ = PretrainedConfig.get_config_dict(
+        model_id, revision=revision, trust_remote_code=trust_remote_code
+    )
+
+    use_medusa = None
+    if "medusa_num_heads" in config_dict:
+        medusa_model_id = model_id
+        medusa_revision = revision
+        model_id = config_dict["base_model_name_or_path"]
+        revision = "main"
+        speculate_medusa = config_dict["medusa_num_heads"]
+        if speculate is not None:
+            if speculate > speculate_medusa:
+                raise RuntimeError(
+                    f"Speculate is set to `{speculate}` but this medusa models only has `{speculate_medusa}` heads, please make them match"
+                )
+            else:
+                set_speculate(speculate)
+        else:
+            set_speculate(speculate_medusa)
+
+        config_dict, _ = PretrainedConfig.get_config_dict(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        is_local = Path(medusa_model_id).exists()
+        if not is_local:
+            medusa_config = hf_hub_download(
+                medusa_model_id, revision=medusa_revision, filename="config.json"
+            )
+            hf_hub_download(
+                medusa_model_id,
+                revision=medusa_revision,
+                filename="medusa_lm_head.safetensors",
+            )
+            use_medusa = Path(medusa_config).parent
+        else:
+            use_medusa = Path(medusa_model_id)
+
+        method = "medusa"
+    else:
+        method = "n-gram"
+
+    speculate = get_speculate()
+    if speculate > 0:
+        logger.info(f"Using speculation {method} with {speculate} input ids.")
+
+    model_type = config_dict["model_type"]
 
     if model_type == "gpt_bigcode":
-        return SantaCoder(model_id, revision, dtype)
+        return SantaCoder(
+            model_id,
+            revision,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
+        )
 
     if model_type == "bloom":
-        return BLOOM(model_id, revision, dtype)
+        return BLOOM(
+            model_id,
+            revision,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
+        )
 
     if model_type in modeling_auto.MODEL_FOR_CAUSAL_LM_MAPPING_NAMES:
-        return CausalLM(model_id, revision, dtype)
+        return CausalLM(
+            model_id,
+            revision,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
+        )
 
     raise ValueError(f"Unsupported model type {model_type}")
diff --git a/server/text_generation_server/models/bloom.py b/server/text_generation_server/models/bloom.py
index afcdf612..86cafda2 100644
--- a/server/text_generation_server/models/bloom.py
+++ b/server/text_generation_server/models/bloom.py
@@ -35,12 +35,16 @@ class BLOOM(CausalLM):
         self,
         model_id: str,
         revision: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
     ):
         super(BLOOM, self).__init__(
             model_id=model_id,
             revision=revision,
+            use_medusa=use_medusa,
             dtype=dtype,
+            trust_remote_code=trust_remote_code,
         )
 
     @property
diff --git a/server/text_generation_server/models/cache_manager.py b/server/text_generation_server/models/cache_manager.py
index 2e6ae086..85e1b19b 100644
--- a/server/text_generation_server/models/cache_manager.py
+++ b/server/text_generation_server/models/cache_manager.py
@@ -43,7 +43,7 @@ class CacheManager:
         ]
         self.free_block_mask = torch.ones(num_blocks, dtype=torch.int32, device="cpu")
         self.slots = torch.arange(
-            0, num_blocks * self.block_size, dtype=torch.int32
+            0, num_blocks * self.block_size, dtype=torch.int64
         ).view(num_blocks, self.block_size)
 
     def allocate(
@@ -55,9 +55,10 @@ class CacheManager:
     ):
         # Get free blocks indices by finding values in mask that are not set to 0
         free_block_indices = self.free_block_mask.nonzero()
-        assert (
-            len(free_block_indices) >= blocks
-        ), f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
+        if blocks > len(free_block_indices):
+            raise RuntimeError(
+                f"Out of available cache blocks: asked {blocks}, only {len(free_block_indices)} free blocks"
+            )
 
         # Slice by the number of required blocks
         block_indices = free_block_indices[:blocks]
diff --git a/server/text_generation_server/models/causal_lm.py b/server/text_generation_server/models/causal_lm.py
index 97a9fd6f..1b03eb3e 100644
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@@ -7,6 +7,7 @@ import itertools
 import math
 import os
 import tempfile
+import time
 from typing import Dict, List, Optional, Tuple, Type
 
 import torch
@@ -32,12 +33,12 @@ from transformers import (
 
 from text_generation_server.utils.tokens import batch_top_tokens
 from text_generation_server.models import Model
+from text_generation_server.utils.tokens import batch_top_tokens
 from text_generation_server.models.types import (
     Batch,
-    PrefillTokens,
+    Tokens,
     Generation,
     GeneratedText,
-    TopTokens,
 )
 from text_generation_server.pb import generate_pb2
 from text_generation_server.utils import (
@@ -47,6 +48,7 @@ from text_generation_server.utils import (
     is_tokenizer_transparent,
 )
 from text_generation_server.utils.debug import dbg_trace
+from text_generation_server.utils.speculate import get_speculate
 
 tracer = trace.get_tracer(__name__)
 
@@ -404,7 +406,8 @@ class CausalLMBatch(Batch):
             parameters,
             batches[dst_batch_idx].next_token_chooser.dtype,
             batches[dst_batch_idx].next_token_chooser.device,
-            hq_env.is_quantization_enabled
+            batches[dst_batch_idx].next_token_chooser.tokenizer,
+            quantization_enabled=hq_env.is_quantization_enabled,
         )
 
         input_ids = batches[dst_batch_idx].input_ids
@@ -459,7 +462,11 @@ class CausalLMBatch(Batch):
                 parameters.append(parameters[0])
 
         next_token_chooser = HeterogeneousNextTokenChooser.from_pb(
-            parameters, dtype, device, hq_env.is_quantization_enabled
+            pb=parameters,
+            dtype=dtype,
+            device=device,
+            tokenizer=tokenizer,
+            quantization_enabled=hq_env.is_quantization_enabled,
         )
         tokenized_inputs = tokenizer(
             [r.data.inputs for r in requests] + dummy_inputs,
@@ -569,14 +576,20 @@ class CausalLM(Model):
         self,
         model_id: str,
         revision: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
     ):
+        if use_medusa:
+            raise RuntimeError("Medusa decoding is not enabled for AutoModel")
+
         # Create tokenizer
         tokenizer = AutoTokenizer.from_pretrained(
             model_id,
             revision=revision,
             padding_side="left",
             truncation_side="left",
+            trust_remote_code=trust_remote_code,
         )
         make_tokenizer_optional(tokenizer)
 
@@ -609,6 +622,7 @@ class CausalLM(Model):
                 model_id,
                 revision=revision,
                 torch_dtype=dtype,
+                trust_remote_code=trust_remote_code,
                 **model_kwargs
             )
             model = self.prepare_model_for_quantization(model)
@@ -644,6 +658,8 @@ class CausalLM(Model):
             kwargs["attn_softmax_bf16"] = True
             kwargs["trim_logits"] = True
 
+        self.speculate = get_speculate()
+
         super(CausalLM, self).__init__(
             model=model,
             tokenizer=tokenizer,
@@ -791,8 +807,8 @@ class CausalLM(Model):
         attention_mask,
         position_ids,
         token_idx,
-        past_key_values: Optional = None,
-        bypass_hpu_graph: Optional = None,
+        past_key_values: Optional[List[Tuple]] = None,
+        bypass_hpu_graph: Optional[bool] = None,
     ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]:
         # Model Forward
         kwargs = {
@@ -816,7 +832,10 @@ class CausalLM(Model):
             return outputs.logits, outputs.past_key_values
 
     @tracer.start_as_current_span("generate_token")
-    def generate_token(self, batches: List[CausalLMBatch]) -> Tuple[List[Generation], Optional[CausalLMBatch]]:
+    def generate_token(
+        self, batches: List[CausalLMBatch]
+    ) -> Tuple[List[Generation], Optional[CausalLMBatch], Tuple[int, int]]:
+        start = time.time_ns()
         # Results
         generations: List[Generation] = []
         prev_batches = []
@@ -839,17 +858,20 @@ class CausalLM(Model):
                 # Select next token
                 input_length = batch.input_length
                 if logits.shape[-2] > 1:
-                    next_token_ids, next_token_logprobs, logprobs = batch.next_token_chooser(
-                        batch.input_ids, logits[:, input_length - 1: input_length, :].squeeze(-2)
+                    next_token_ids, next_token_logprobs, logprobs, _, _ = batch.next_token_chooser(
+                        batch.input_ids, logits[:, input_length - 1: input_length, :].squeeze(-2), self.speculate
                     )
                 else:
-                    next_token_ids, next_token_logprobs, logprobs = batch.next_token_chooser(
-                        batch.input_ids, logits.squeeze(-2)
+                    next_token_ids, next_token_logprobs, logprobs, _, _ = batch.next_token_chooser(
+                        batch.input_ids, logits.squeeze(-2), self.speculate
                     )
+                # Speculation is not active for causal
+                accepted_ids = torch.ones_like(batch.input_ids)[:, 0]
                 batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
                     batch.top_n_tokens,
                     batch.top_n_tokens_tensor,
                     logprobs,
+                    accepted_ids,
                 )
 
                 prev_batches.append({
@@ -934,6 +956,8 @@ class CausalLM(Model):
 
         htorch.core.mark_step()
 
+        start_decode = time.time_ns()
+
         # Stage 3. Finish and return previous generations
         stopped = len(requests_to_generate) > 0
         for prev_batch in prev_batches:
@@ -1014,33 +1038,49 @@ class CausalLM(Model):
                         clean_up_tokenization_spaces=False,
                         skip_special_tokens=False,
                     )
-                    prefill_tokens = PrefillTokens(prefill_token_ids, prefill_logprobs, prefill_texts)
+                    prefill_tokens = Tokens(
+                        prefill_token_ids,
+                        prefill_logprobs,
+                        prefill_texts,
+                        is_special=[],
+                    )
                 else:
                     prefill_tokens = None
 
                 if top_n_tokens > 0:
-                    toptoken_texts = self.tokenizer.batch_decode(
-                        top_token_ids,
-                        clean_up_tokenization_spaces=False,
-                        skip_special_tokens=False,
-                    )
-                    special_toptokens = [token_id in self.all_special_ids for token_id in top_token_ids]
-                    top_tokens = TopTokens(
-                        top_token_ids,
-                        top_token_logprobs,
-                        toptoken_texts,
-                        special_toptokens,
-                    )
+                    all_top_tokens = []
+                    for top_token_ids, top_token_logprobs in zip(
+                        top_token_ids, top_token_logprobs
+                    ):
+                        toptoken_texts = self.tokenizer.batch_decode(
+                            top_token_ids,
+                            clean_up_tokenization_spaces=False,
+                            skip_special_tokens=False,
+                        )
+                        special_toptokens = [
+                            token_id in self.all_special_ids
+                            for token_id in top_token_ids
+                        ]
+                        top_tokens = Tokens(
+                            top_token_ids,
+                            top_token_logprobs,
+                            toptoken_texts,
+                            special_toptokens,
+                        )
+                        all_top_tokens.append(top_tokens)
+                    top_tokens = all_top_tokens
                 else:
                     top_tokens = None
 
                 generation = Generation(
                     request.id,
                     prefill_tokens,
-                    next_token_id,
-                    next_token_logprob,
-                    next_token_text,
-                    next_token_id in self.all_special_ids,
+                    Tokens(
+                        [next_token_id],
+                        [next_token_logprob],
+                        [next_token_text],
+                        [next_token_id in self.all_special_ids],
+                    ),
                     generated_text,
                     top_tokens,
                 )
@@ -1059,13 +1099,16 @@ class CausalLM(Model):
                 self.hb_profiler.stop()
             else:
                 self.hb_profiler.step()
-        return generations, batch if not stopped else None
+
+        forward_ns = start_decode - start
+        decode_ns = time.time_ns() - start_decode
+        return generations, batch if not stopped else None, (forward_ns, decode_ns)
 
     def warmup(self, batches: List[CausalLMBatch]) -> None:
         # prefill
-        _, prefill_batch = self.generate_token([batches.pop(0)])
+        _, prefill_batch, _ = self.generate_token([batches.pop(0)])
         # decode
-        _, decode_batch = self.generate_token([prefill_batch])
+        _, decode_batch, _ = self.generate_token([prefill_batch])
         # shifts
         self.shifting_warmup(decode_batch)
 
@@ -1074,12 +1117,12 @@ class CausalLM(Model):
             return
 
         # prefill
-        _, prefill_batch = self.generate_token([batches.pop(0)])
+        _, prefill_batch, _ = self.generate_token([batches.pop(0)])
         # concatenate and decode
-        _, decode_batch = self.generate_token([decode_batch, prefill_batch])
+        _, decode_batch, _ = self.generate_token([decode_batch, prefill_batch])
         # decodes
         while decode_batch is not None:
-            _, decode_batch = self.generate_token([decode_batch])
+            _, decode_batch, _ = self.generate_token([decode_batch])
 
     def shifting_warmup(self, batch: CausalLMBatch) -> None:
         chunk_sizes = CHUNK_SIZES.copy()
diff --git a/server/text_generation_server/models/custom_modeling/bloom_modeling.py b/server/text_generation_server/models/custom_modeling/bloom_modeling.py
index 5423d75a..c8f02bca 100644
--- a/server/text_generation_server/models/custom_modeling/bloom_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/bloom_modeling.py
@@ -36,7 +36,7 @@ from text_generation_server.utils.layers import (
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     TensorParallelRowLinear,
-    TensorParallelHead,
+    SpeculativeHead,
 )
 
 CUSTOM_KERNELS_ENABLED = False
@@ -820,7 +820,7 @@ class BloomForCausalLM(BloomPreTrainedModel):
         super().__init__(config)
         self.transformer = BloomModel(config, weights)
 
-        self.lm_head = TensorParallelHead.load(
+        self.lm_head = SpeculativeHead.load(
             config,
             prefix="word_embeddings",
             weights=weights,
@@ -870,7 +870,7 @@ class BloomForCausalLM(BloomPreTrainedModel):
         output_hidden_states: Optional[bool] = None,
         return_dict: Optional[bool] = None,
         **deprecated_arguments,
-    ) -> Union[Tuple[torch.Tensor], CausalLMOutputWithCrossAttentions]:
+    ) -> Union[Tuple, CausalLMOutputWithCrossAttentions]:
         r"""
         labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
             Labels for language modeling. Note that the labels **are shifted** inside the model, i.e. you can set
@@ -904,17 +904,20 @@ class BloomForCausalLM(BloomPreTrainedModel):
         )
         hidden_states = transformer_outputs[0]
 
-        lm_logits = self.lm_head(hidden_states)
+        logits, speculative_logits = self.lm_head(hidden_states)
         loss = None
 
         if not return_dict:
             output = (lm_logits,) + transformer_outputs[1:]
             return ((loss,) + output) if loss is not None else output
 
-        return CausalLMOutputWithCrossAttentions(
-            loss=loss,
-            logits=lm_logits,
-            past_key_values=transformer_outputs.past_key_values,
-            hidden_states=transformer_outputs.hidden_states,
-            attentions=transformer_outputs.attentions,
+        return (
+            CausalLMOutputWithCrossAttentions(
+                loss=loss,
+                logits=logits,
+                past_key_values=transformer_outputs.past_key_values,
+                hidden_states=transformer_outputs.hidden_states,
+                attentions=transformer_outputs.attentions,
+            ),
+            speculative_logits,
         )
diff --git a/server/text_generation_server/models/custom_modeling/clip.py b/server/text_generation_server/models/custom_modeling/clip.py
new file mode 100644
index 00000000..c4917733
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/clip.py
@@ -0,0 +1,827 @@
+from typing import Optional, Tuple, Union
+
+import torch
+from torch import nn
+
+from transformers.activations import ACT2FN
+from transformers.modeling_attn_mask_utils import (
+    _create_4d_causal_attention_mask,
+    _prepare_4d_attention_mask,
+)
+from transformers.modeling_outputs import (
+    BaseModelOutput,
+    BaseModelOutputWithPooling,
+    ImageClassifierOutput,
+)
+from transformers import CLIPConfig, CLIPTextConfig, CLIPVisionConfig
+
+from text_generation_server.utils.layers import (
+    TensorParallelEmbedding,
+    TensorParallelColumnLinear,
+    TensorParallelRowLinear,
+)
+
+
+class CLIPVisionEmbeddings(nn.Module):
+    def __init__(self, prefix, config: CLIPVisionConfig, weights):
+        super().__init__()
+        self.config = config
+        self.embed_dim = config.hidden_size
+        self.image_size = config.image_size
+        self.patch_size = config.patch_size
+
+        # TODO Should we TP this ?
+        self.class_embedding = weights.get_tensor(f"{prefix}.class_embedding")
+
+        self.patch_embedding = nn.Conv2d(
+            in_channels=config.num_channels,
+            out_channels=self.embed_dim,
+            kernel_size=self.patch_size,
+            stride=self.patch_size,
+            bias=False,
+        )
+        self.patch_embedding.weight = nn.Parameter(
+            weights.get_tensor(f"{prefix}.patch_embedding.weight"), requires_grad=False
+        )
+
+        self.num_patches = (self.image_size // self.patch_size) ** 2
+        self.num_positions = self.num_patches + 1
+        self.position_embedding = TensorParallelEmbedding(
+            prefix=f"{prefix}.position_embedding", weights=weights
+        )
+        self.register_buffer(
+            "position_ids",
+            torch.arange(self.num_positions, device=weights.device).expand((1, -1)),
+            persistent=False,
+        )
+
+    def forward(self, pixel_values: torch.FloatTensor) -> torch.Tensor:
+        batch_size = pixel_values.shape[0]
+        target_dtype = self.patch_embedding.weight.dtype
+        patch_embeds = self.patch_embedding(
+            pixel_values.to(dtype=target_dtype)
+        )  # shape = [*, width, grid, grid]
+        patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
+
+        class_embeds = self.class_embedding.expand(batch_size, 1, -1)
+        embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
+        embeddings = embeddings + self.position_embedding(self.position_ids)
+        return embeddings
+
+
+class CLIPTextEmbeddings(nn.Module):
+    def __init__(self, config: CLIPTextConfig):
+        super().__init__()
+        embed_dim = config.hidden_size
+
+        self.token_embedding = nn.Embedding(config.vocab_size, embed_dim)
+        self.position_embedding = nn.Embedding(
+            config.max_position_embeddings, embed_dim
+        )
+
+        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
+        self.register_buffer(
+            "position_ids",
+            torch.arange(config.max_position_embeddings).expand((1, -1)),
+            persistent=False,
+        )
+
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+    ) -> torch.Tensor:
+        seq_length = (
+            input_ids.shape[-1] if input_ids is not None else inputs_embeds.shape[-2]
+        )
+
+        if position_ids is None:
+            position_ids = self.position_ids[:, :seq_length]
+
+        if inputs_embeds is None:
+            inputs_embeds = self.token_embedding(input_ids)
+
+        position_embeddings = self.position_embedding(position_ids)
+        embeddings = inputs_embeds + position_embeddings
+
+        return embeddings
+
+
+class CLIPAttention(nn.Module):
+    """Multi-headed attention from 'Attention Is All You Need' paper"""
+
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        self.config = config
+        self.embed_dim = config.hidden_size
+        self.num_heads = config.num_attention_heads
+        self.head_size = self.embed_dim // self.num_heads
+        if self.head_size * self.num_heads != self.embed_dim:
+            raise ValueError(
+                f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim} and `num_heads`:"
+                f" {self.num_heads})."
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.embed_dim = self.embed_dim // weights.process_group.size()
+        self.scale = self.head_size**-0.5
+        self.dropout = config.attention_dropout
+
+        self.qkv = TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=True,
+        )
+        self.out_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.out_proj",
+            weights=weights,
+            bias=True,
+        )
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return (
+            tensor.view(bsz, seq_len, self.num_heads, self.head_size)
+            .transpose(1, 2)
+            .contiguous()
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        causal_attention_mask: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        """Input shape: Batch x Time x Channel"""
+
+        bsz, tgt_len, _ = hidden_states.size()
+
+        # get query proj
+
+        qkv = self.qkv(hidden_states)
+        query_states, key_states, value_states = qkv.split(
+            [
+                self.head_size * self.num_heads,
+            ]
+            * 3,
+            dim=2,
+        )
+        query_states = query_states * self.scale
+        key_states = self._shape(key_states, -1, bsz)
+        value_states = self._shape(value_states, -1, bsz)
+
+        proj_shape = (bsz * self.num_heads, -1, self.head_size)
+        query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+        key_states = key_states.view(*proj_shape)
+        value_states = value_states.view(*proj_shape)
+
+        src_len = key_states.size(1)
+        attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
+
+        if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
+            raise ValueError(
+                f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
+                f" {attn_weights.size()}"
+            )
+
+        # apply the causal_attention_mask first
+        if causal_attention_mask is not None:
+            if causal_attention_mask.size() != (bsz, 1, tgt_len, src_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is"
+                    f" {causal_attention_mask.size()}"
+                )
+            attn_weights = (
+                attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+                + causal_attention_mask
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if attention_mask is not None:
+            if attention_mask.size() != (bsz, 1, tgt_len, src_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
+                )
+            attn_weights = (
+                attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+                + attention_mask
+            )
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+
+        attn_probs = nn.functional.dropout(
+            attn_weights, p=self.dropout, training=self.training
+        )
+
+        attn_output = torch.bmm(attn_probs, value_states)
+
+        if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_size):
+            raise ValueError(
+                f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_size)}, but is"
+                f" {attn_output.size()}"
+            )
+
+        attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_size)
+        attn_output = attn_output.transpose(1, 2)
+        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+
+        attn_output = self.out_proj(attn_output)
+
+        return attn_output, None
+
+
+class CLIPMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        self.config = config
+        self.activation_fn = ACT2FN[config.hidden_act]
+        self.fc1 = TensorParallelColumnLinear.load(
+            prefix=f"{prefix}.fc1", config=config, weights=weights, bias=True
+        )
+        self.fc2 = TensorParallelRowLinear.load(
+            prefix=f"{prefix}.fc2", config=config, weights=weights, bias=True
+        )
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.fc1(hidden_states)
+        hidden_states = self.activation_fn(hidden_states)
+        hidden_states = self.fc2(hidden_states)
+        return hidden_states
+
+
+class CLIPEncoderLayer(nn.Module):
+    def __init__(self, prefix, config: CLIPConfig, weights):
+        super().__init__()
+        self.embed_dim = config.hidden_size
+        self.self_attn = CLIPAttention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+        self.layer_norm1 = nn.LayerNorm.load(
+            prefix=f"{prefix}.layer_norm1", weights=weights, eps=config.layer_norm_eps
+        )
+        self.mlp = CLIPMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
+        self.layer_norm2 = nn.LayerNorm.load(
+            prefix=f"{prefix}.layer_norm2", weights=weights, eps=config.layer_norm_eps
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: torch.Tensor,
+        causal_attention_mask: torch.Tensor,
+    ):
+        """
+        Args:
+            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
+            attention_mask (`torch.FloatTensor`): attention mask of size
+                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
+                `(config.encoder_attention_heads,)`.
+        """
+        residual = hidden_states
+
+        hidden_states = self.layer_norm1(hidden_states)
+        hidden_states, attn_weights = self.self_attn(
+            hidden_states=hidden_states,
+            attention_mask=attention_mask,
+            causal_attention_mask=causal_attention_mask,
+        )
+        hidden_states = residual + hidden_states
+
+        residual = hidden_states
+        hidden_states = self.layer_norm2(hidden_states)
+        hidden_states = self.mlp(hidden_states)
+        hidden_states = residual + hidden_states
+
+        return hidden_states
+
+
+class CLIPPreTrainedModel(nn.Module):
+    """
+    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
+    models.
+    """
+
+    config_class = CLIPConfig
+    base_model_prefix = "clip"
+    supports_gradient_checkpointing = True
+
+
+CLIP_START_DOCSTRING = r"""
+    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
+    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
+    etc.)
+
+    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
+    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
+    and behavior.
+
+    Parameters:
+        config ([`CLIPConfig`]): Model configuration class with all the parameters of the model.
+            Initializing with a config file does not load the weights associated with the model, only the
+            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
+"""
+
+CLIP_TEXT_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.max_position_embeddings - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+"""
+
+CLIP_VISION_INPUTS_DOCSTRING = r"""
+    Args:
+        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
+            [`AutoImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details.
+"""
+
+CLIP_INPUTS_DOCSTRING = r"""
+    Args:
+        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
+            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
+            it.
+
+            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
+            [`PreTrainedTokenizer.__call__`] for details.
+
+            [What are input IDs?](../glossary#input-ids)
+        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+            - 1 for tokens that are **not masked**,
+            - 0 for tokens that are **masked**.
+
+            [What are attention masks?](../glossary#attention-mask)
+        position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
+            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
+            config.max_position_embeddings - 1]`.
+
+            [What are position IDs?](../glossary#position-ids)
+        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
+            Pixel values. Padding will be ignored by default should you provide it. Pixel values can be obtained using
+            [`AutoImageProcessor`]. See [`CLIPImageProcessor.__call__`] for details.
+        return_loss (`bool`, *optional*):
+            Whether or not to return the contrastive loss.
+"""
+
+
+class CLIPEncoder(nn.Module):
+    """
+    Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
+    [`CLIPEncoderLayer`].
+
+    Args:
+        config: CLIPConfig
+    """
+
+    def __init__(self, prefix, config: CLIPConfig, weights):
+        super().__init__()
+        self.config = config
+        self.layers = nn.ModuleList(
+            [
+                CLIPEncoderLayer(
+                    prefix=f"{prefix}.layers.{i}", config=config, weights=weights
+                )
+                for i in range(config.num_hidden_layers)
+            ]
+        )
+
+    def forward(
+        self,
+        inputs_embeds,
+        attention_mask: Optional[torch.Tensor] = None,
+        causal_attention_mask: Optional[torch.Tensor] = None,
+    ):
+        r"""
+        Args:
+            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
+                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
+                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
+                than the model's internal embedding lookup matrix.
+            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
+
+                - 1 for tokens that are **not masked**,
+                - 0 for tokens that are **masked**.
+
+                [What are attention masks?](../glossary#attention-mask)
+            causal_attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
+                Causal mask for the text model. Mask values selected in `[0, 1]`:
+
+                - 1 for tokens that are **not masked**,
+                - 0 for tokens that are **masked**.
+
+                [What are attention masks?](../glossary#attention-mask)
+        """
+
+        hidden_states = inputs_embeds
+        for idx, encoder_layer in enumerate(self.layers):
+            hidden_states = encoder_layer(
+                hidden_states,
+                attention_mask,
+                causal_attention_mask,
+            )
+
+        return hidden_states
+
+
+class CLIPTextTransformer(nn.Module):
+    def __init__(self, config: CLIPTextConfig):
+        super().__init__()
+        self.config = config
+        embed_dim = config.hidden_size
+        self.embeddings = CLIPTextEmbeddings(config)
+        self.encoder = CLIPEncoder(
+            prefix=f"{prefix}.encoder", config=config, weights=weights
+        )
+        self.final_layer_norm = nn.LayerNorm(embed_dim, eps=config.layer_norm_eps)
+
+        # For `pooled_output` computation
+        self.eos_token_id = config.eos_token_id
+
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+    ):
+        r"""
+        Returns:
+
+        """
+        if input_ids is None:
+            raise ValueError("You have to specify input_ids")
+
+        input_shape = input_ids.size()
+        input_ids = input_ids.view(-1, input_shape[-1])
+
+        hidden_states = self.embeddings(input_ids=input_ids, position_ids=position_ids)
+
+        # CLIP's text model uses causal mask, prepare it here.
+        # https://github.com/openai/CLIP/blob/cfcffb90e69f37bf2ff1e988237a0fbe41f33c04/clip/model.py#L324
+        causal_attention_mask = _create_4d_causal_attention_mask(
+            input_shape, hidden_states.dtype, device=hidden_states.device
+        )
+        # expand attention_mask
+        if attention_mask is not None:
+            # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+            attention_mask = _prepare_4d_attention_mask(
+                attention_mask, hidden_states.dtype
+            )
+
+        encoder_outputs = self.encoder(
+            inputs_embeds=hidden_states,
+            attention_mask=attention_mask,
+            causal_attention_mask=causal_attention_mask,
+        )
+
+        last_hidden_state = encoder_outputs[0]
+        last_hidden_state = self.final_layer_norm(last_hidden_state)
+
+        if self.eos_token_id == 2:
+            # The `eos_token_id` was incorrect before PR #24773: Let's keep what have been done here.
+            # A CLIP model with such `eos_token_id` in the config can't work correctly with extra new tokens added
+            # ------------------------------------------------------------
+            # text_embeds.shape = [batch_size, sequence_length, transformer.width]
+            # take features from the eot embedding (eot_token is the highest number in each sequence)
+            # casting to torch.int for onnx compatibility: argmax doesn't support int64 inputs with opset 14
+            pooled_output = last_hidden_state[
+                torch.arange(
+                    last_hidden_state.shape[0], device=last_hidden_state.device
+                ),
+                input_ids.to(dtype=torch.int, device=last_hidden_state.device).argmax(
+                    dim=-1
+                ),
+            ]
+        else:
+            # The config gets updated `eos_token_id` from PR #24773 (so the use of exta new tokens is possible)
+            pooled_output = last_hidden_state[
+                torch.arange(
+                    last_hidden_state.shape[0], device=last_hidden_state.device
+                ),
+                # We need to get the first position of `eos_token_id` value (`pad_token_ids` might equal to `eos_token_id`)
+                (
+                    input_ids.to(dtype=torch.int, device=last_hidden_state.device)
+                    == self.eos_token_id
+                )
+                .int()
+                .argmax(dim=-1),
+            ]
+
+        return last_hidden_state
+
+
+class CLIPTextModel(CLIPPreTrainedModel):
+    config_class = CLIPTextConfig
+
+    _no_split_modules = ["CLIPTextEmbeddings", "CLIPEncoderLayer"]
+
+    def __init__(self, config: CLIPTextConfig):
+        super().__init__(config)
+        self.text_model = CLIPTextTransformer(config)
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def forward(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+    ):
+        r"""
+        Returns:
+
+        Examples:
+
+        ```python
+        >>> from transformers import AutoTokenizer, CLIPTextModel
+
+        >>> model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")
+        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+
+        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+
+        >>> outputs = model(**inputs)
+        >>> last_hidden_state = outputs.last_hidden_state
+        >>> pooled_output = outputs.pooler_output  # pooled (EOS token) states
+        ```"""
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+
+        return self.text_model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+        )
+
+
+class CLIPVisionTransformer(nn.Module):
+    def __init__(self, prefix, config: CLIPVisionConfig, weights):
+        super().__init__()
+        self.config = config
+        embed_dim = config.hidden_size
+
+        self.embeddings = CLIPVisionEmbeddings(
+            prefix=f"{prefix}.embeddings", config=config, weights=weights
+        )
+        self.pre_layrnorm = nn.LayerNorm.load(
+            prefix=f"{prefix}.pre_layrnorm", weights=weights, eps=config.layer_norm_eps
+        )
+        self.encoder = CLIPEncoder(
+            prefix=f"{prefix}.encoder", config=config, weights=weights
+        )
+        # self.post_layernorm = nn.LayerNorm.load(prefix=f"{prefix}.post_layernorm", weights=weights, eps=config.layer_norm_eps)
+
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+    ):
+        r"""
+        Returns:
+
+        """
+        if pixel_values is None:
+            raise ValueError("You have to specify pixel_values")
+
+        hidden_states = self.embeddings(pixel_values)
+        hidden_states = self.pre_layrnorm(hidden_states)
+
+        encoder_outputs = self.encoder(
+            inputs_embeds=hidden_states,
+        )
+        last_hidden_state = encoder_outputs
+        # pooled_output = last_hidden_state[:, 0, :]
+        # pooled_output = self.post_layernorm(pooled_output)
+
+        return BaseModelOutputWithPooling(
+            last_hidden_state=last_hidden_state,
+            # pooler_output=pooled_output,
+            # hidden_states=encoder_outputs,
+        )
+
+
+class CLIPVisionModel(CLIPPreTrainedModel):
+    config_class = CLIPVisionConfig
+    main_input_name = "pixel_values"
+    _no_split_modules = ["CLIPEncoderLayer"]
+
+    def __init__(self, config: CLIPVisionConfig):
+        super().__init__(config)
+        self.vision_model = CLIPVisionTransformer(config)
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_input_embeddings(self) -> nn.Module:
+        return self.vision_model.embeddings.patch_embedding
+
+    def forward(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+    ):
+        r"""
+        Returns:
+
+        Examples:
+
+        ```python
+        >>> from PIL import Image
+        >>> import requests
+        >>> from transformers import AutoProcessor, CLIPVisionModel
+
+        >>> model = CLIPVisionModel.from_pretrained("openai/clip-vit-base-patch32")
+        >>> processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
+
+        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        >>> image = Image.open(requests.get(url, stream=True).raw)
+
+        >>> inputs = processor(images=image, return_tensors="pt")
+
+        >>> outputs = model(**inputs)
+        >>> last_hidden_state = outputs.last_hidden_state
+        >>> pooled_output = outputs.pooler_output  # pooled CLS states
+        ```"""
+        return_dict = (
+            return_dict if return_dict is not None else self.config.use_return_dict
+        )
+
+        return self.vision_model(
+            pixel_values=pixel_values,
+        )
+
+
+class CLIPModel(nn.Module):
+    def __init__(self, prefix, config: CLIPConfig, weights):
+        super().__init__()
+        text_config = config.text_config
+        vision_config = config.vision_config
+
+        self.projection_dim = config.projection_dim
+        self.text_embed_dim = text_config.hidden_size
+        self.vision_embed_dim = vision_config.hidden_size
+
+        self.text_model = CLIPTextTransformer(text_config)
+        self.vision_model = CLIPVisionTransformer(vision_config)
+
+        self.visual_projection = nn.Linear(
+            self.vision_embed_dim, self.projection_dim, bias=False
+        )
+        self.text_projection = nn.Linear(
+            self.text_embed_dim, self.projection_dim, bias=False
+        )
+        self.logit_scale = nn.Parameter(
+            torch.tensor(self.config.logit_scale_init_value)
+        )
+
+        # Initialize weights and apply final processing
+        self.post_init()
+
+    def get_text_features(
+        self,
+        input_ids: Optional[torch.Tensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.Tensor] = None,
+    ) -> torch.FloatTensor:
+        r"""
+        Returns:
+            text_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The text embeddings obtained by
+            applying the projection layer to the pooled output of [`CLIPTextModel`].
+
+        Examples:
+
+        ```python
+        >>> from transformers import AutoTokenizer, CLIPModel
+
+        >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        >>> tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+
+        >>> inputs = tokenizer(["a photo of a cat", "a photo of a dog"], padding=True, return_tensors="pt")
+        >>> text_features = model.get_text_features(**inputs)
+        ```"""
+        text_outputs = self.text_model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+        )
+
+        pooled_output = text_outputs[1]
+        text_features = self.text_projection(pooled_output)
+
+        return text_features
+
+    def get_image_features(
+        self,
+        pixel_values: Optional[torch.FloatTensor] = None,
+    ) -> torch.FloatTensor:
+        r"""
+        Returns:
+            image_features (`torch.FloatTensor` of shape `(batch_size, output_dim`): The image embeddings obtained by
+            applying the projection layer to the pooled output of [`CLIPVisionModel`].
+
+        Examples:
+
+        ```python
+        >>> from PIL import Image
+        >>> import requests
+        >>> from transformers import AutoProcessor, CLIPModel
+
+        >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        >>> processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
+
+        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        >>> image = Image.open(requests.get(url, stream=True).raw)
+
+        >>> inputs = processor(images=image, return_tensors="pt")
+
+        >>> image_features = model.get_image_features(**inputs)
+        ```"""
+        # Use CLIP model's config for some fields (if specified) instead of those of vision & text components.
+        vision_outputs = self.vision_model(
+            pixel_values=pixel_values,
+        )
+
+        pooled_output = vision_outputs[1]  # pooled_output
+        image_features = self.visual_projection(pooled_output)
+
+        return image_features
+
+    def forward(
+        self,
+        input_ids: Optional[torch.LongTensor] = None,
+        pixel_values: Optional[torch.FloatTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+    ):
+        r"""
+        Returns:
+
+        Examples:
+
+        ```python
+        >>> from PIL import Image
+        >>> import requests
+        >>> from transformers import AutoProcessor, CLIPModel
+
+        >>> model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+        >>> processor = AutoProcessor.from_pretrained("openai/clip-vit-base-patch32")
+
+        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
+        >>> image = Image.open(requests.get(url, stream=True).raw)
+
+        >>> inputs = processor(
+        ...     text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True
+        ... )
+
+        >>> outputs = model(**inputs)
+        >>> logits_per_image = outputs.logits_per_image  # this is the image-text similarity score
+        >>> probs = logits_per_image.softmax(dim=1)  # we can take the softmax to get the label probabilities
+        ```"""
+        # Use CLIP model's config for some fields (if specified) instead of those of vision & text components.
+        vision_outputs = self.vision_model(
+            pixel_values=pixel_values,
+            return_dict=return_dict,
+        )
+
+        text_outputs = self.text_model(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            position_ids=position_ids,
+            return_dict=return_dict,
+        )
+
+        image_embeds = vision_outputs[1]
+        image_embeds = self.visual_projection(image_embeds)
+
+        text_embeds = text_outputs[1]
+        text_embeds = self.text_projection(text_embeds)
+
+        # normalized features
+        image_embeds = image_embeds / image_embeds.norm(p=2, dim=-1, keepdim=True)
+        text_embeds = text_embeds / text_embeds.norm(p=2, dim=-1, keepdim=True)
+
+        # cosine similarity as logits
+        logit_scale = self.logit_scale.exp()
+        logits_per_text = torch.matmul(text_embeds, image_embeds.t()) * logit_scale
+        logits_per_image = logits_per_text.t()
+
+        return logits_per_image, logits_per_text
diff --git a/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
new file mode 100644
index 00000000..56d9a966
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_cohere_modeling.py
@@ -0,0 +1,525 @@
+# coding=utf-8
+# Copyright 2024 Cohere team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from typing import Optional, List, Tuple
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.import_utils import IS_ROCM_SYSTEM, IS_CUDA_SYSTEM
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+    FastLayerNorm,
+)
+
+if IS_CUDA_SYSTEM:
+    import dropout_layer_norm
+else:
+    dropout_layer_norm = None
+
+
+class CohereRotary(PositionRotaryEmbedding):
+    def forward(
+        self,
+        query: torch.Tensor,
+        key: torch.Tensor,
+        cos: torch.Tensor,
+        sin: torch.Tensor,
+    ):
+        # Such controlflows may add some overhead.
+        if IS_CUDA_SYSTEM:
+            import rotary_emb
+
+            q1 = query[..., ::2]
+            q2 = query[..., 1::2]
+
+            rotary_emb.apply_rotary(q1, q2, cos, sin, q1, q2, False)
+
+            k1 = key[..., ::2]
+            k2 = key[..., 1::2]
+
+            rotary_emb.apply_rotary(k1, k2, cos, sin, k1, k2, False)
+        elif IS_ROCM_SYSTEM:
+            from vllm import pos_encoding_ops
+
+            # NOTE: On RoCm systems, we use a ROPE implementatation adapted from VLLM which launches a single kernel for both query/key, contrary to flash-attn implementation used on NVIDIA systems.
+            # Compiling flash-attn rotary on RoCm, it appears hipcc is unable to unroll loops, resulting in an even slower inference compared to eager: https://github.com/pytorch/pytorch/issues/113773
+
+            head_size = query.shape[-1]
+
+            # Inplace operation, updating query and key.
+            pos_encoding_ops.rotary_embedding(query, key, head_size, cos, sin, False)
+        else:
+            raise ValueError(
+                "Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction."
+            )
+
+
+class CohereLayerNorm(nn.Module):
+    def __init__(self, prefix, weights, eps):
+        super().__init__()
+        weight = weights.get_sharded(f"{prefix}.weight", dim=0)
+        self.weight = nn.Parameter(weight)
+        # Fake weights
+        self.ones = weight.new_ones(weight.shape[1])
+        self.eps = eps
+
+    def forward(self, hidden_states):
+        if hidden_states.shape[-1] > 8192 or IS_ROCM_SYSTEM:
+            hidden_states = hidden_states.reshape(
+                -1, self.weight.shape[0], self.weight.shape[1]
+            )
+            input_dtype = hidden_states.dtype
+            hidden_states = hidden_states.to(torch.float32)
+            mean = hidden_states.mean(-1, keepdim=True)
+            hidden_states_minus_mean = hidden_states - mean
+            variance = hidden_states_minus_mean.pow(2).mean(-1, keepdim=True)
+            hidden_states = hidden_states_minus_mean * torch.rsqrt(variance + self.eps)
+            hidden_states = self.weight.to(torch.float32) * hidden_states
+            hidden_states = hidden_states.view(-1, self.weight.shape[1])
+            return hidden_states.to(input_dtype)
+
+        (
+            hidden_states,
+            *rest,
+        ) = dropout_layer_norm.dropout_add_ln_fwd(
+            hidden_states,
+            None,
+            self.ones,
+            None,
+            None,
+            None,
+            None,
+            None,
+            0.0,
+            self.eps,
+            1.0,
+            0,
+            None,
+            False,
+            False,
+        )
+
+        # Required to apply one weight matrix per head
+        hidden_states = hidden_states.view(
+            -1, self.weight.shape[0], self.weight.shape[1]
+        )
+        hidden_states = self.weight * hidden_states
+        hidden_states = hidden_states.view(-1, self.weight.shape[1])
+
+        return hidden_states
+
+
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=config.attention_bias,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.hidden_size % config.num_attention_heads == 0
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.hidden_size // config.num_attention_heads
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    if config.attention_bias:
+        w = [
+            weights.get_sharded(f"{p}.bias", dim=0)
+            for p in [f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"]
+        ]
+        bias = torch.cat(w, dim=0).to(dtype=weights.dtype).to(device=weights.device)
+    else:
+        bias = None
+
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=bias, quantize=config.quantize)
+    )
+
+
+class FlashCohereAttention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.num_heads = config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.rotary_emb = CohereRotary.static(
+            config=config,
+            dim=self.head_size,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        self.softmax_scale = self.head_size**-0.5
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        self.use_qk_norm = config.use_qk_norm
+        if self.use_qk_norm:
+            self.q_norm = CohereLayerNorm(
+                prefix=f"{prefix}.q_norm",
+                weights=weights,
+                eps=config.layer_norm_eps,
+            )
+            self.k_norm = CohereLayerNorm(
+                prefix=f"{prefix}.k_norm",
+                weights=weights,
+                eps=config.layer_norm_eps,
+            )
+        else:
+            self.q_norm = None
+            self.k_norm = None
+
+        self.o_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.o_proj",
+            weights=weights,
+            bias=config.attention_bias,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        qkv = self.query_key_value(hidden_states)
+        query, key, value = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                self.head_size * self.num_key_value_heads,
+                self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+
+        if self.use_qk_norm:
+            query = query.reshape(-1, self.head_size)
+            key = key.reshape(-1, self.head_size)
+            query = self.q_norm(query.contiguous())
+            key = self.k_norm(key.contiguous())
+
+        query = query.view(-1, self.num_heads, self.head_size)
+        key = key.view(-1, self.num_key_value_heads, self.head_size)
+        value = value.view(-1, self.num_key_value_heads, self.head_size)
+
+        self.rotary_emb(query, key, cos, sin)
+
+        paged_attention.reshape_and_cache(key, value, kv_cache[0], kv_cache[1], slots)
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            # flash attention
+            flash_attn.attention(
+                query,
+                key,
+                value,
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.o_proj(
+            attn_output.view(-1, self.num_heads * self.head_size), reduce=False
+        )
+
+
+class CohereMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        )
+        # Fuse gate and up proj
+        self.gate_up_proj = TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.gate_proj", f"{prefix}.up_proj"],
+            weights=weights,
+            dim=0,
+            bias=False,
+        )
+        self.down_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.down_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.intermediate_size = (
+            config.intermediate_size // weights.process_group.size()
+        )
+
+    def forward(self, hidden_states):
+        gate_up_states = self.gate_up_proj(hidden_states)
+        gate_up_states = gate_up_states.view(-1, 2, self.intermediate_size)
+        return self.down_proj(
+            self.act(gate_up_states[:, 0]) * gate_up_states[:, 1], reduce=False
+        )
+
+
+class FlashCohereLayer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"model.layers.{layer_id}"
+        self.self_attn = FlashCohereAttention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+        self.mlp = CohereMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
+
+        self.input_layernorm = FastLayerNorm.load_no_bias(
+            prefix=f"{prefix}.input_layernorm",
+            weights=weights,
+            eps=config.layer_norm_eps,
+        )
+        self.process_group = weights.process_group
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
+
+        # Self Attention
+        attn_output = self.self_attn(
+            normed_hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+
+        mlp_output = self.mlp(normed_hidden_states)
+        output = attn_output + mlp_output
+
+        if self.process_group.size() > 1:
+            torch.distributed.all_reduce(output, group=self.process_group)
+
+        return output, res
+
+
+class FlashCohereModel(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        process_group = weights.process_group
+        self.tp_rank = process_group.rank()
+        self.tp_world_size = process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="model.embed_tokens", weights=weights
+        )
+        self.layers = nn.ModuleList(
+            [
+                FlashCohereLayer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.norm = FastLayerNorm.load_no_bias(
+            prefix="model.norm", weights=weights, eps=config.layer_norm_eps
+        )
+
+        self.gradient_checkpointing = False
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class FlashCohereForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = FlashCohereModel(config, weights)
+        try:
+            self.lm_head = SpeculativeHead.load(
+                config,
+                prefix="lm_head",
+                weights=weights,
+            )
+        except RuntimeError:
+            self.lm_head = SpeculativeHead.load(
+                config,
+                prefix="model.embed_tokens",
+                weights=weights,
+            )
+        self.logit_scale = config.logit_scale
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits, speculative_logits = self.lm_head(hidden_states)
+        logits *= self.logit_scale
+        if speculative_logits is not None:
+            speculative_logits *= self.logit_scale
+        return logits, speculative_logits
diff --git a/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py b/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
new file mode 100644
index 00000000..d04ce39e
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_dbrx_modeling.py
@@ -0,0 +1,833 @@
+# coding=utf-8
+# Copyright 2022 HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from typing import Optional, List, Tuple, Any
+from loguru import logger
+
+from vllm.model_executor.layers.fused_moe import fused_moe
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    FastLinear,
+    FastLayerNorm,
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+)
+from text_generation_server.utils.log import log_once
+
+
+class DbrxAttentionConfig(PretrainedConfig):
+    def __init__(
+        self,
+        attn_pdrop: float = 0,
+        clip_qkv: Optional[float] = None,
+        kv_n_heads: int = 1,
+        rope_theta: float = 10000.0,
+        **kwargs: Any,
+    ):
+        super().__init__(**kwargs)
+        self.attn_pdrop = attn_pdrop
+        self.clip_qkv = clip_qkv
+        self.kv_n_heads = kv_n_heads
+        self.rope_theta = rope_theta
+
+        for k in ["model_type"]:
+            if k in kwargs:
+                kwargs.pop(k)
+        if len(kwargs) != 0:
+            raise ValueError(f"Found unknown {kwargs=}")
+
+
+class DbrxFFNConfig(PretrainedConfig):
+    def __init__(
+        self,
+        ffn_act_fn: Optional[dict] = None,
+        ffn_hidden_size: int = 3584,
+        moe_num_experts: int = 4,
+        moe_top_k: int = 1,
+        moe_jitter_eps: Optional[float] = None,
+        moe_loss_weight: float = 0.01,
+        moe_normalize_expert_weights: Optional[float] = 1,
+        uniform_expert_assignment: bool = False,
+        **kwargs: Any,
+    ):
+        super().__init__()
+        if ffn_act_fn is None:
+            ffn_act_fn = {"name": "silu"}
+        self.ffn_act_fn = ffn_act_fn
+        self.ffn_hidden_size = ffn_hidden_size
+        self.moe_num_experts = moe_num_experts
+        self.moe_top_k = moe_top_k
+        self.moe_jitter_eps = moe_jitter_eps
+        self.moe_loss_weight = moe_loss_weight
+        self.moe_normalize_expert_weights = moe_normalize_expert_weights
+        self.uniform_expert_assignment = uniform_expert_assignment
+
+        if uniform_expert_assignment:
+            raise ValueError("`uniform_expert_assignment = True` is not supported")
+
+        for k in ["model_type"]:
+            if k in kwargs:
+                kwargs.pop(k)
+        if len(kwargs) != 0:
+            raise ValueError(f"Found unknown {kwargs=}")
+
+
+class DbrxConfig(PretrainedConfig):
+    def __init__(
+        self,
+        d_model: int = 2048,
+        n_heads: int = 16,
+        n_layers: int = 24,
+        max_seq_len: int = 2048,
+        vocab_size: int = 32000,
+        resid_pdrop: float = 0.0,
+        emb_pdrop: float = 0.0,
+        attn_config: Optional[DbrxAttentionConfig] = None,
+        ffn_config: Optional[DbrxFFNConfig] = None,
+        use_cache: bool = True,
+        initializer_range: float = 0.02,
+        output_router_logits: bool = False,
+        router_aux_loss_coef: float = 0.05,
+        **kwargs: Any,
+    ):
+        if attn_config is None:
+            self.attn_config = DbrxAttentionConfig()
+        elif isinstance(attn_config, dict):
+            self.attn_config = DbrxAttentionConfig(**attn_config)
+        else:
+            self.attn_config = attn_config
+
+        if ffn_config is None:
+            self.ffn_config = DbrxFFNConfig()
+        elif isinstance(ffn_config, dict):
+            self.ffn_config = DbrxFFNConfig(**ffn_config)
+        else:
+            self.ffn_config = ffn_config
+
+        self.d_model = d_model
+        self.n_heads = n_heads
+        self.n_layers = n_layers
+        self.max_seq_len = max_seq_len
+        self.vocab_size = vocab_size
+        self.resid_pdrop = resid_pdrop
+        self.emb_pdrop = emb_pdrop
+        self.use_cache = use_cache
+        self.initializer_range = initializer_range
+        self.output_router_logits = output_router_logits
+        self.router_aux_loss_coef = router_aux_loss_coef
+
+        tie_word_embeddings = kwargs.pop("tie_word_embeddings", False)
+        if tie_word_embeddings:
+            raise ValueError("tie_word_embeddings is not supported for Dbrx models.")
+
+        super().__init__(
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+def promote_scalar(x: torch.Tensor) -> torch.Tensor:
+    return x.view(1) if len(x.size()) == 0 else x
+
+
+def load_attention(config, prefix, weights):
+    if config.n_heads != config.attn_config.kv_n_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_qkv(
+            config,
+            prefix=f"{prefix}.Wqkv",
+            weights=weights,
+            bias=False,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.d_model % config.n_heads == 0
+    assert config.n_heads % weights.process_group.size() == 0
+
+    head_dim = config.d_model // config.n_heads
+    world_size = weights.process_group.size()
+    rank = weights.process_group.rank()
+
+    q_block_size = config.d_model // world_size
+    q_start = rank * q_block_size
+    q_stop = (rank + 1) * q_block_size
+
+    kv_block_size = (config.attn_config.kv_n_heads * head_dim) // world_size
+    k_offset = config.d_model
+    k_start = k_offset + rank * kv_block_size
+    k_stop = k_offset + (rank + 1) * kv_block_size
+
+    v_offset = config.d_model + config.attn_config.kv_n_heads * head_dim
+    v_start = v_offset + rank * kv_block_size
+    v_stop = v_offset + (rank + 1) * kv_block_size
+
+    if config.quantize in ["gptq", "awq"]:
+        try:
+            qweight_slice = weights._get_slice(f"{prefix}.qweight")
+            q_qweight = qweight_slice[:, q_start:q_stop]
+            k_qweight = qweight_slice[:, k_start:k_stop]
+            v_qweight = qweight_slice[:, v_start:v_stop]
+
+            qweight = torch.cat([q_qweight, k_qweight, v_qweight], dim=1)
+        except RuntimeError:
+            raise RuntimeError(
+                f"Cannot load `{config.quantize}` weight, make sure the model is already quantized"
+            )
+
+        qzeros_slice = weights._get_slice(f"{prefix}.qzeros")
+        q_qzeros = qzeros_slice[:, q_start:q_stop]
+        k_qzeros = qzeros_slice[:, k_start:k_stop]
+        v_qzeros = qzeros_slice[:, v_start:v_stop]
+
+        qzeros = torch.cat([q_qzeros, k_qzeros, v_qzeros], dim=1)
+
+        scales_slice = weights._get_slice(f"{prefix}.scales")
+        q_scales = scales_slice[:, q_start:q_stop]
+        k_scales = scales_slice[:, k_start:k_stop]
+        v_scales = scales_slice[:, v_start:v_stop]
+
+        scales = torch.cat([q_scales, k_scales, v_scales], dim=1)
+
+        bits, groupsize, desc_act, quant_method = weights._get_gptq_params()
+
+        from text_generation_server.utils.layers import HAS_EXLLAMA
+
+        use_exllama = (
+            bits == 4 and HAS_EXLLAMA and config.quantize == "gptq" and not desc_act
+        )
+
+        if config.quantize == "gptq" and quant_method == "gptq":
+            g_idx_slice = weights._get_slice(f"{prefix}.g_idx")
+            q_g_idx = g_idx_slice[:, q_start:q_stop]
+            k_g_idx = g_idx_slice[:, k_start:k_stop]
+            v_g_idx = g_idx_slice[:, v_start:v_stop]
+
+            w = [q_g_idx, k_g_idx, v_g_idx]
+            for w2 in w[1:]:
+                torch.testing.assert_close(w2, w[0])
+            g_idx = w[0]
+        elif config.quantize == "gptq" and quant_method == "awq":
+            log_once(
+                logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
+            )
+            from text_generation_server.utils.awq.conversion_utils import (
+                fast_awq_to_gptq,
+            )
+
+            qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
+            if use_exllama:
+                g_idx = None
+            else:
+                g_idx = (
+                    torch.arange(qweight.shape[0] * (32 // bits), device=qweight.device)
+                    // groupsize
+                ).to(dtype=torch.int32)
+        else:
+            g_idx = None
+
+        weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
+    else:
+        qkv_slice = weights._get_slice(f"{prefix}.Wqkv.weight")
+        q = qkv_slice[q_start:q_stop]
+        k = qkv_slice[k_start:k_stop]
+        v = qkv_slice[v_start:v_stop]
+
+        weight = torch.cat([q, k, v], dim=0)
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=None, quantize=config.quantize)
+    )
+
+
+def _load_experts(config, prefix, weights):
+    world_size = weights.process_group.size()
+    rank = weights.process_group.rank()
+
+    assert (
+        config.ffn_config.ffn_hidden_size % world_size == 0
+    ), f"The chosen size {config.ffn_config.ffn_hidden_size} is not compatible with sharding on {world_size} shards"
+
+    expert_size = config.ffn_config.ffn_hidden_size
+    block_size = expert_size // world_size
+    start = rank * block_size
+    stop = (rank + 1) * block_size
+
+    tensor = torch.empty(
+        (config.ffn_config.moe_num_experts * block_size, config.d_model),
+        dtype=weights.dtype,
+        device=weights.device,
+    )
+
+    slice_ = weights._get_slice(f"{prefix}")
+
+    for i in range(config.ffn_config.moe_num_experts):
+        offset = i * expert_size
+        expert_slice = slice_[start + offset : stop + offset]
+
+        tensor[i * block_size : (i + 1) * block_size] = expert_slice.to(
+            dtype=weights.dtype
+        ).to(device=weights.device)
+    return tensor
+
+
+def _load_experts_quantized(config, prefix, weights, cls):
+    world_size = weights.process_group.size()
+    rank = weights.process_group.rank()
+
+    assert (
+        config.ffn_config.ffn_hidden_size % world_size == 0
+    ), f"The chosen size {config.ffn_config.ffn_hidden_size} is not compatible with sharding on {world_size} shards"
+
+    expert_size = config.ffn_config.ffn_hidden_size
+    block_size = expert_size // world_size
+    start = rank * block_size
+    stop = (rank + 1) * block_size
+
+    slice_ = weights._get_slice(f"{prefix}")
+
+    experts = []
+    for i in range(config.ffn_config.moe_num_experts):
+        if config.quantize in ["gptq", "awq"]:
+            raise NotImplementedError(
+                "Dbrx does not support gptq/awq quantization yet."
+            )
+        else:
+            offset = i * expert_size
+            expert_slice = (
+                slice_[start + offset : stop + offset]
+                .to(dtype=weights.dtype)
+                .to(device=weights.device)
+            )
+
+        if cls == TensorParallelRowLinear:
+            expert_slice = expert_slice.t().contiguous()
+            linear = get_linear(expert_slice, None, config.quantize)
+            experts.append(cls(linear, weights.process_group))
+        else:
+            linear = get_linear(expert_slice, None, config.quantize)
+            experts.append(cls(linear))
+
+    return experts
+
+
+class DbrxAttention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.clip_qkv = config.attn_config.clip_qkv
+        self.num_heads = config.n_heads
+        self.hidden_size = config.d_model
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.head_size,
+            base=config.attn_config.rope_theta,
+            device=weights.device,
+        )
+
+        self.softmax_scale = self.head_size**-0.5
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.attn_config.kv_n_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        self.o_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.out_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        qkv = self.query_key_value(hidden_states)
+        if self.clip_qkv is not None:
+            qkv = qkv.clamp(min=-self.clip_qkv, max=self.clip_qkv)
+
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin)
+
+        paged_attention.reshape_and_cache(
+            kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            # flash attention
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
+
+
+class DbrxNormAttentionNorm(nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.norm_1 = FastLayerNorm.load_no_bias(
+            prefix=f"{prefix}.norm_1", weights=weights, eps=1e-5
+        )
+        self.self_attn = DbrxAttention(
+            prefix=f"{prefix}.attn", config=config, weights=weights
+        )
+        self.norm_2 = FastLayerNorm.load_no_bias(
+            prefix=f"{prefix}.norm_2",
+            weights=weights,
+            eps=1e-5,
+        )
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        normed_hidden_states, res = self.norm_1(hidden_states, residual)
+
+        # Self Attention
+        attn_output = self.self_attn(
+            normed_hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+
+        # faster post attention rms norm
+        normed_attn_res_output, attn_res = self.norm_2(attn_output, res)
+
+        return normed_attn_res_output, attn_res
+
+
+@torch.jit.script
+def select_experts(
+    gate_logits: torch.Tensor, top_k: int, moe_normalize_expert_weights: int
+):
+    # all_probs: (sequence_length, n_experts) and upcast for softmax
+    all_probs = torch.nn.functional.softmax(gate_logits, dim=1, dtype=torch.float)
+    # weights, selected_experts: (sequence_length, top-k)
+    weights, selected_experts = torch.topk(all_probs, top_k, dim=-1)
+    if moe_normalize_expert_weights:
+        weights = weights / torch.norm(
+            weights, p=moe_normalize_expert_weights, dim=-1, keepdim=True
+        )
+    weights = weights.view(-1)
+    selected_experts = selected_experts.view(-1)
+
+    return selected_experts, weights
+
+
+@torch.jit.script
+def round_up(x: torch.Tensor, value: int):
+    return torch.div(x + (value - 1), value, rounding_mode="trunc") * value
+
+
+class BlockSparseMoE(nn.Module):
+    def __init__(self, prefix, config: DbrxConfig, weights):
+        super().__init__()
+        self.moe_normalize_expert_weights = (
+            config.ffn_config.moe_normalize_expert_weights
+        )
+        self.hidden_dim = config.d_model
+        self.ffn_dim = config.ffn_config.ffn_hidden_size // weights.process_group.size()
+        self.num_experts = config.ffn_config.moe_num_experts
+        self.top_k = config.ffn_config.moe_top_k
+
+        act = config.ffn_config.ffn_act_fn["name"]
+        if "gelu" in act:
+            self.act = lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        elif "silu" in act:
+            self.act = torch.nn.functional.silu
+        else:
+            self.act = ACT2FN[act]
+
+        # gating
+        self.gate = FastLinear.load(
+            config, f"{prefix}.router.layer", weights, bias=False
+        )
+
+        # merged expert weights, all of size  (n_experts * ffn_dim, hidden_dim)
+        w1 = _load_experts(config, f"{prefix}.experts.mlp.w1", weights).view(
+            self.num_experts, self.ffn_dim, self.hidden_dim
+        )
+        v1 = _load_experts(config, f"{prefix}.experts.mlp.v1", weights).view(
+            self.num_experts, self.ffn_dim, self.hidden_dim
+        )
+        self.wv1 = torch.cat([w1, v1], dim=1)
+        self.w2 = (
+            _load_experts(config, f"{prefix}.experts.mlp.w2", weights)
+            .view(self.num_experts, self.ffn_dim, self.hidden_dim)
+            .transpose(1, 2)
+            .contiguous()
+        )
+
+        self.process_group = weights.process_group
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # router_logits: (num_tokens, n_experts)
+        router_logits = self.gate(x)
+        out = fused_moe(
+            x,
+            self.wv1,
+            self.w2,
+            router_logits,
+            self.top_k,
+            renormalize=self.moe_normalize_expert_weights,
+            inplace=True,
+        )
+
+        # Reduce sum
+        if self.process_group.size() > 1:
+            torch.distributed.all_reduce(out, group=self.process_group)
+
+        return out.view(*x.shape)
+
+
+class DenseMoE(nn.Module):
+    def __init__(self, prefix, config: DbrxConfig, weights):
+        super().__init__()
+
+        self.moe_normalize_expert_weights = (
+            config.ffn_config.moe_normalize_expert_weights
+        )
+        self.hidden_dim = config.d_model
+        self.ffn_dim = config.ffn_config.ffn_hidden_size // weights.process_group.size()
+        self.num_experts = config.ffn_config.moe_num_experts
+        self.top_k = config.ffn_config.moe_top_k
+
+        act = config.ffn_config.ffn_act_fn["name"]
+        if "gelu" in act:
+            self.act = lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        elif "silu" in act:
+            self.act = torch.nn.functional.silu
+        else:
+            self.act = ACT2FN[act]
+
+        # gating
+        self.gate = FastLinear.load(
+            config, f"{prefix}.router.layer", weights, bias=False
+        )
+
+        self.w1 = _load_experts_quantized(
+            config,
+            prefix=f"{prefix}.experts.mlp.w1",
+            weights=weights,
+            cls=TensorParallelColumnLinear,
+        )
+        self.w2 = _load_experts_quantized(
+            config,
+            prefix=f"{prefix}.experts.mlp.w2",
+            weights=weights,
+            cls=TensorParallelRowLinear,
+        )
+        self.v1 = _load_experts_quantized(
+            config,
+            prefix=f"{prefix}.experts.mlp.v1",
+            weights=weights,
+            cls=TensorParallelColumnLinear,
+        )
+
+        self.process_group = weights.process_group
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        x: (sequence_length, model_dim)
+        gate_logits: (sequence_length, n_experts)
+        """
+        # optional reshape
+        input_shape = x.shape
+        x = x.view(-1, input_shape[-1])
+
+        # gate_logits: (sequence_length, n_experts)
+        gate_logits = self.gate(x)
+        # all_probs: (sequence_length, n_experts) and upcast for softmax
+        weights = torch.nn.functional.softmax(gate_logits, dim=1, dtype=torch.float)
+
+        if self.top_k < self.num_experts:
+            _, not_selected_experts = torch.topk(
+                weights,
+                self.num_experts - self.top_k,
+                largest=False,
+                sorted=False,
+                dim=1,
+            )
+            # Mask not selected experts
+            weights.scatter_(1, not_selected_experts, 0)
+
+        # Re-normalize
+        if self.moe_normalize_expert_weights:
+            weights = weights / torch.norm(
+                weights, p=self.moe_normalize_expert_weights, dim=-1, keepdim=True
+            )
+        weights = weights.to(x.dtype)
+
+        # Final output tensor
+        out = x.new_zeros(x.shape[0], self.hidden_dim)
+        for i in range(self.num_experts):
+            h = self.act(self.w1[i](x)) * self.v1[i](x)
+            h = self.w2[i](h, reduce=False)
+            # Add expert output to out with masking
+            out += h * weights[:, i].view(-1, 1)
+
+        # Reduce sum
+        if self.process_group.size() > 1:
+            torch.distributed.all_reduce(out, group=self.process_group)
+
+        return out
+
+
+class DbrxLayer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"transformer.blocks.{layer_id}"
+
+        self.attn = DbrxNormAttentionNorm(
+            prefix=f"{prefix}.norm_attn_norm", config=config, weights=weights
+        )
+
+        moe_cls = BlockSparseMoE if config.quantize is None else DenseMoE
+        self.moe = moe_cls(f"{prefix}.ffn", config, weights)
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        # Self Attention
+        attn_output, attn_res = self.attn(
+            hidden_states,
+            residual,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+
+        moe_output = self.moe(attn_output)
+
+        return moe_output, attn_res
+
+
+class DbrxModel(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="transformer.wte", weights=weights
+        )
+
+        self.layers = nn.ModuleList(
+            [
+                DbrxLayer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.n_layers)
+            ]
+        )
+        self.norm = FastLayerNorm.load_no_bias(
+            prefix="transformer.norm_f", weights=weights, eps=1e-5
+        )
+
+        self.head_size = self.layers[0].attn.self_attn.head_size
+        self.num_heads = self.layers[0].attn.self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].attn.self_attn.num_key_value_heads
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].attn.self_attn.rotary_emb.get_cos_sin(
+            position_ids, max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class FlashDbrxForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = DbrxModel(config, weights)
+        self.lm_head = SpeculativeHead.load(
+            config,
+            prefix="lm_head",
+            weights=weights,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits, speculative_logits = self.lm_head(hidden_states)
+        return logits, speculative_logits
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
new file mode 100644
index 00000000..bd7596db
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma_modeling.py
@@ -0,0 +1,459 @@
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from typing import Optional, List, Tuple
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+    FastRMSNorm,
+)
+
+
+class GemmaConfig(PretrainedConfig):
+    def __init__(
+        self,
+        vocab_size=256128,
+        hidden_size=3072,
+        intermediate_size=24576,
+        num_hidden_layers=28,
+        num_attention_heads=16,
+        num_key_value_heads=16,
+        head_dim=256,
+        hidden_act="gelu_pytorch_tanh",
+        max_position_embeddings=8192,
+        initializer_range=0.02,
+        rms_norm_eps=1e-6,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=1,
+        eos_token_id=2,
+        tie_word_embeddings=True,
+        rope_theta=10000.0,
+        rope_scaling=None,
+        attention_bias=False,
+        attention_dropout=0.0,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.head_dim = head_dim
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.rope_scaling = rope_scaling
+        self.attention_bias = attention_bias
+        self.attention_dropout = attention_dropout
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+class GemmaFastRMSNorm(FastRMSNorm):
+    @classmethod
+    def load(cls, prefix, weights, eps=1e-6):
+        weight = weights.get_tensor(f"{prefix}.weight") + 1
+        return cls(weight, eps)
+
+    # perform the multiplication in full precision and downcast after
+    def forward(self, hidden_states, residual=None):
+        if residual is not None:
+            hidden_states += residual
+        residual = hidden_states
+        hidden_states = hidden_states.to(torch.float32)
+        variance = hidden_states.pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
+        hidden_states = hidden_states * self.weight
+        return hidden_states.to(self.weight.dtype), residual
+
+
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=False,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.head_dim
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=None, quantize=config.quantize)
+    )
+
+
+class FlashGemmaAttention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.num_heads = config.num_attention_heads
+        self.head_size = config.head_dim
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.head_size,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        self.softmax_scale = self.head_size**-0.5
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        self.o_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.o_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        qkv = self.query_key_value(hidden_states)
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin)
+
+        paged_attention.reshape_and_cache(
+            kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            # flash attention
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
+
+
+class GemmaMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        )
+        # Fuse gate and up proj
+        self.gate_up_proj = TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.gate_proj", f"{prefix}.up_proj"],
+            weights=weights,
+            dim=0,
+            bias=False,
+        )
+        self.down_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.down_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.intermediate_size = (
+            config.intermediate_size // weights.process_group.size()
+        )
+
+    def forward(self, hidden_states):
+        gate_up_states = self.gate_up_proj(hidden_states)
+        gate_up_states = gate_up_states.view(-1, 2, self.intermediate_size)
+        return self.down_proj(self.act(gate_up_states[:, 0]) * gate_up_states[:, 1])
+
+
+class FlashGemmaLayer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"model.layers.{layer_id}"
+        self.self_attn = FlashGemmaAttention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+        self.mlp = GemmaMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
+
+        self.input_layernorm = GemmaFastRMSNorm.load(
+            prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
+        )
+        self.post_attention_layernorm = GemmaFastRMSNorm.load(
+            prefix=f"{prefix}.post_attention_layernorm",
+            weights=weights,
+            eps=config.rms_norm_eps,
+        )
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
+
+        # Self Attention
+        attn_output = self.self_attn(
+            normed_hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+
+        # faster post attention rms norm
+        normed_attn_res_output, attn_res = self.post_attention_layernorm(
+            attn_output, res
+        )
+
+        mlp_output = self.mlp(normed_attn_res_output)
+
+        return mlp_output, attn_res
+
+
+class FlashGemmaModel(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        process_group = weights.process_group
+        self.tp_rank = process_group.rank()
+        self.tp_world_size = process_group.size()
+        embed_norm = config.hidden_size**0.5
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="model.embed_tokens", weights=weights
+        )
+        self.embed_tokens.weight *= embed_norm
+
+        self.layers = nn.ModuleList(
+            [
+                FlashGemmaLayer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.norm = GemmaFastRMSNorm.load(
+            prefix="model.norm", weights=weights, eps=config.rms_norm_eps
+        )
+
+        self.gradient_checkpointing = False
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class FlashGemmaForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = FlashGemmaModel(config, weights)
+        self.lm_head = SpeculativeHead.load(
+            config,
+            prefix="model.embed_tokens" if config.tie_word_embeddings else "lm_head",
+            weights=weights,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits, speculative_logits = self.lm_head(hidden_states)
+        return logits, speculative_logits
diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
index 4aeb447d..4cf0fcf2 100644
--- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py
@@ -32,15 +32,10 @@ from text_generation_server.utils.layers import (
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     PositionRotaryEmbedding,
-    TensorParallelHead,
+    SpeculativeHead,
     get_linear,
+    FastRMSNorm,
 )
-from text_generation_server.utils.import_utils import IS_CUDA_SYSTEM, IS_ROCM_SYSTEM
-
-if IS_CUDA_SYSTEM:
-    import dropout_layer_norm
-elif IS_ROCM_SYSTEM:
-    from vllm import layernorm_ops
 
 
 class LlamaConfig(PretrainedConfig):
@@ -95,75 +90,6 @@ class LlamaConfig(PretrainedConfig):
         )
 
 
-class LlamaRMSNorm(nn.Module):
-    def __init__(self, prefix, weights, eps=1e-6):
-        """
-        LlamaRMSNorm is equivalent to T5LayerNorm
-        """
-        super().__init__()
-
-        weight = weights.get_tensor(f"{prefix}.weight")
-        self.weight = nn.Parameter(weight)
-        self.variance_epsilon = eps
-
-    def forward(self, hidden_states, residual=None):
-        if hidden_states.shape[-1] > 8192:
-            if residual is not None:
-                hidden_states += residual
-            residual = hidden_states
-
-            hidden_states = hidden_states.to(torch.float32)
-            variance = hidden_states.pow(2).mean(-1, keepdim=True)
-            hidden_states = hidden_states * torch.rsqrt(
-                variance + self.variance_epsilon
-            )
-
-            # convert into half-precision if necessary
-            if self.weight.dtype in [torch.float16, torch.bfloat16]:
-                hidden_states = hidden_states.to(self.weight.dtype)
-
-            return self.weight * hidden_states, residual
-        elif IS_CUDA_SYSTEM:
-            # faster post attention rms norm
-            normed_hidden_states, res, *rest = dropout_layer_norm.dropout_add_ln_fwd(
-                hidden_states,
-                residual,
-                self.weight,
-                None,
-                None,
-                None,
-                None,
-                None,
-                0.0,
-                self.variance_epsilon,
-                1.0,
-                0,
-                None,
-                False,
-                True,  # Activate RMSNorm
-            )
-            if res is None:
-                res = hidden_states
-
-            return normed_hidden_states, res
-        elif IS_ROCM_SYSTEM:
-            # We use VLLM RMSNorm kernel that can be compiled for RoCm, instead of Flash Attention ones that can not.
-            if residual is not None:
-                hidden_states += residual
-            residual = hidden_states
-
-            out = torch.empty_like(hidden_states)
-            layernorm_ops.rms_norm(
-                out,
-                hidden_states,
-                self.weight.data,
-                self.variance_epsilon,
-            )
-            return out, residual
-        else:
-            raise ValueError("Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction.")
-
-
 def load_attention(config, prefix, weights):
     if config.num_attention_heads != config.num_key_value_heads:
         return _load_gqa(config, prefix, weights)
@@ -277,7 +203,7 @@ class FlashLlamaAttention(torch.nn.Module):
         )
         query = query.view(-1, self.num_heads, self.head_size)
         kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
-        
+
         self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin)
 
         paged_attention.reshape_and_cache(
@@ -312,7 +238,7 @@ class FlashLlamaAttention(torch.nn.Module):
                 input_lengths,
                 max_s,
             )
-        
+
         return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
 
 
@@ -325,9 +251,9 @@ class LlamaMLP(nn.Module):
             if "gelu" not in act
             else lambda x: torch.nn.functional.gelu(
                 x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
             )
         )
         # Fuse gate and up proj
@@ -355,18 +281,17 @@ class LlamaMLP(nn.Module):
 
 
 class FlashLlamaLayer(nn.Module):
-    def __init__(self, layer_id, config, weights):
+    def __init__(self, prefix, config, weights):
         super().__init__()
-        prefix = f"model.layers.{layer_id}"
         self.self_attn = FlashLlamaAttention(
             prefix=f"{prefix}.self_attn", config=config, weights=weights
         )
         self.mlp = LlamaMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
 
-        self.input_layernorm = LlamaRMSNorm(
+        self.input_layernorm = FastRMSNorm.load(
             prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
         )
-        self.post_attention_layernorm = LlamaRMSNorm(
+        self.post_attention_layernorm = FastRMSNorm.load(
             prefix=f"{prefix}.post_attention_layernorm",
             weights=weights,
             eps=config.rms_norm_eps,
@@ -411,27 +336,30 @@ class FlashLlamaLayer(nn.Module):
 
 
 class FlashLlamaModel(torch.nn.Module):
-    def __init__(self, config, weights):
+    def __init__(self, prefix, config, weights):
         super().__init__()
 
         process_group = weights.process_group
         self.tp_rank = process_group.rank()
         self.tp_world_size = process_group.size()
-        self.embed_tokens = TensorParallelEmbedding(
-            prefix="model.embed_tokens", weights=weights
-        )
         self.layers = nn.ModuleList(
             [
                 FlashLlamaLayer(
-                    layer_id,
-                    config,
-                    weights,
+                    prefix=(
+                        f"model.layers.{layer_id}"
+                        if not prefix
+                        else f"{prefix}.model.layers.{layer_id}"
+                    ),
+                    config=config,
+                    weights=weights,
                 )
                 for layer_id in range(config.num_hidden_layers)
             ]
         )
-        self.norm = LlamaRMSNorm(
-            prefix="model.norm", weights=weights, eps=config.rms_norm_eps
+        self.norm = FastRMSNorm.load(
+            prefix="model.norm" if not prefix else f"{prefix}.model.norm",
+            weights=weights,
+            eps=config.rms_norm_eps,
         )
 
         self.gradient_checkpointing = False
@@ -442,7 +370,7 @@ class FlashLlamaModel(torch.nn.Module):
 
     def forward(
         self,
-        input_ids: torch.Tensor,
+        inputs_embeds: torch.Tensor,
         position_ids: torch.Tensor,
         cu_seqlen_prefill: Optional[torch.Tensor],
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
@@ -450,8 +378,10 @@ class FlashLlamaModel(torch.nn.Module):
         slots: torch.Tensor,
         input_lengths: torch.Tensor,
         max_s: int,
+        true_max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
     ) -> torch.Tensor:
-        hidden_states = self.embed_tokens(input_ids)
+        hidden_states = inputs_embeds
 
         # Get rotary cos and sin for this forward
         # Avoid to index in each layer
@@ -480,13 +410,19 @@ class FlashLlamaModel(torch.nn.Module):
 
 
 class FlashLlamaForCausalLM(torch.nn.Module):
-    def __init__(self, config, weights):
+    def __init__(self, prefix, config, weights):
         super().__init__()
 
-        self.model = FlashLlamaModel(config, weights)
-        self.lm_head = TensorParallelHead.load(
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix=(
+                "model.embed_tokens" if not prefix else f"{prefix}.model.embed_tokens"
+            ),
+            weights=weights,
+        )
+        self.model = FlashLlamaModel(prefix, config, weights)
+        self.lm_head = SpeculativeHead.load(
             config,
-            prefix="lm_head",
+            prefix="lm_head" if not prefix else f"{prefix}.lm_head",
             weights=weights,
         )
 
@@ -500,10 +436,12 @@ class FlashLlamaForCausalLM(torch.nn.Module):
         slots: torch.Tensor,
         input_lengths: torch.Tensor,
         max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor] = None,
         lm_head_indices: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        inputs_embeds = self.embed_tokens(input_ids)
         hidden_states = self.model(
-            input_ids,
+            inputs_embeds,
             position_ids,
             cu_seqlen_prefill,
             kv_cache,
@@ -511,8 +449,10 @@ class FlashLlamaForCausalLM(torch.nn.Module):
             slots,
             input_lengths,
             max_s,
+            true_max_s=max_s,
+            prefill_cache_indices=prefill_cache_indices,
         )
         if lm_head_indices is not None:
             hidden_states = hidden_states[lm_head_indices]
-        logits = self.lm_head(hidden_states)
-        return logits
+        logits, speculative_logits = self.lm_head(hidden_states)
+        return logits, speculative_logits
diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
index 959949f0..ffaa0c32 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -27,24 +27,15 @@ from transformers.configuration_utils import PretrainedConfig
 from typing import Optional, List, Tuple
 
 from text_generation_server.utils import paged_attention, flash_attn
-from text_generation_server.utils.flash_attn import attention, HAS_FLASH_ATTN_V2_ROCM, HAS_FLASH_ATTN_V2_CUDA
 from text_generation_server.utils.layers import (
     TensorParallelRowLinear,
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     PositionRotaryEmbedding,
-    TensorParallelHead,
+    SpeculativeHead,
     get_linear,
+    FastRMSNorm,
 )
-from text_generation_server.utils.import_utils import IS_CUDA_SYSTEM, IS_ROCM_SYSTEM
-
-if IS_CUDA_SYSTEM:
-    import dropout_layer_norm
-elif IS_ROCM_SYSTEM:
-    from vllm import layernorm_ops
-
-if not HAS_FLASH_ATTN_V2_CUDA and not HAS_FLASH_ATTN_V2_ROCM:
-    raise ImportError("Mistral model requires flash attn v2")
 
 
 class MistralConfig(PretrainedConfig):
@@ -69,7 +60,7 @@ class MistralConfig(PretrainedConfig):
         pretraining_tp=1,
         tie_word_embeddings=False,
         rope_theta=10000.0,
-        sliding_window=4096,
+        sliding_window=None,
         **kwargs,
     ):
         self.vocab_size = vocab_size
@@ -101,75 +92,6 @@ class MistralConfig(PretrainedConfig):
         )
 
 
-class MistralRMSNorm(nn.Module):
-    def __init__(self, prefix, weights, eps=1e-6):
-        """
-        LlamaRMSNorm is equivalent to T5LayerNorm
-        """
-        super().__init__()
-
-        weight = weights.get_tensor(f"{prefix}.weight")
-        self.weight = nn.Parameter(weight)
-        self.variance_epsilon = eps
-
-    def forward(self, hidden_states, residual=None):
-        if hidden_states.shape[-1] > 8192:
-            if residual is not None:
-                hidden_states += residual
-            residual = hidden_states
-
-            hidden_states = hidden_states.to(torch.float32)
-            variance = hidden_states.pow(2).mean(-1, keepdim=True)
-            hidden_states = hidden_states * torch.rsqrt(
-                variance + self.variance_epsilon
-            )
-
-            # convert into half-precision if necessary
-            if self.weight.dtype in [torch.float16, torch.bfloat16]:
-                hidden_states = hidden_states.to(self.weight.dtype)
-
-            return self.weight * hidden_states, residual
-        elif IS_CUDA_SYSTEM:
-            # faster post attention rms norm
-            normed_hidden_states, res, *rest = dropout_layer_norm.dropout_add_ln_fwd(
-                hidden_states,
-                residual,
-                self.weight,
-                None,
-                None,
-                None,
-                None,
-                None,
-                0.0,
-                self.variance_epsilon,
-                1.0,
-                0,
-                None,
-                False,
-                True,  # Activate RMSNorm
-            )
-            if res is None:
-                res = hidden_states
-
-            return normed_hidden_states, res
-        elif IS_ROCM_SYSTEM:
-            # We use VLLM RMSNorm kernel that can be compiled for RoCm, instead of Flash Attention ones that can not.
-            if residual is not None:
-                hidden_states += residual
-            residual = hidden_states
-
-            out = torch.empty_like(hidden_states)
-            layernorm_ops.rms_norm(
-                out,
-                hidden_states,
-                self.weight.data,
-                self.variance_epsilon,
-            )
-            return out, residual
-        else:
-            raise ValueError("Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction.")
-
-
 def load_attention(config, prefix, weights):
     if config.num_attention_heads != config.num_key_value_heads:
         return _load_gqa(config, prefix, weights)
@@ -218,7 +140,7 @@ class MistralAttention(torch.nn.Module):
     ):
         super().__init__()
         self.max_past = (
-            config.sliding_window if config.sliding_window is not None else 0
+            config.sliding_window if config.sliding_window is not None else -1
         )
         self.num_heads = config.num_attention_heads
         self.hidden_size = config.hidden_size
@@ -333,9 +255,9 @@ class MistralMLP(nn.Module):
             if "gelu" not in act
             else lambda x: torch.nn.functional.gelu(
                 x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
             )
         )
         # Fuse gate and up proj
@@ -363,18 +285,17 @@ class MistralMLP(nn.Module):
 
 
 class MistralLayer(nn.Module):
-    def __init__(self, layer_id, config, weights):
+    def __init__(self, prefix, config, weights):
         super().__init__()
-        prefix = f"model.layers.{layer_id}"
         self.self_attn = MistralAttention(
             prefix=f"{prefix}.self_attn", config=config, weights=weights
         )
         self.mlp = MistralMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
 
-        self.input_layernorm = MistralRMSNorm(
+        self.input_layernorm = FastRMSNorm.load(
             prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
         )
-        self.post_attention_layernorm = MistralRMSNorm(
+        self.post_attention_layernorm = FastRMSNorm.load(
             prefix=f"{prefix}.post_attention_layernorm",
             weights=weights,
             eps=config.rms_norm_eps,
@@ -421,27 +342,24 @@ class MistralLayer(nn.Module):
 
 
 class MistralModel(torch.nn.Module):
-    def __init__(self, config, weights):
+    def __init__(self, prefix, config, weights):
         super().__init__()
 
         process_group = weights.process_group
         self.tp_rank = process_group.rank()
         self.tp_world_size = process_group.size()
-        self.embed_tokens = TensorParallelEmbedding(
-            prefix="model.embed_tokens", weights=weights
-        )
         self.layers = nn.ModuleList(
             [
                 MistralLayer(
-                    layer_id,
-                    config,
-                    weights,
+                    prefix=f"{prefix}.layers.{layer_id}",
+                    config=config,
+                    weights=weights,
                 )
                 for layer_id in range(config.num_hidden_layers)
             ]
         )
-        self.norm = MistralRMSNorm(
-            prefix="model.norm", weights=weights, eps=config.rms_norm_eps
+        self.norm = FastRMSNorm.load(
+            prefix=f"{prefix}.norm", weights=weights, eps=config.rms_norm_eps
         )
 
         self.gradient_checkpointing = False
@@ -452,7 +370,7 @@ class MistralModel(torch.nn.Module):
 
     def forward(
         self,
-        input_ids: torch.Tensor,
+        inputs_embeds: torch.Tensor,
         position_ids: torch.Tensor,
         cu_seqlen_prefill: Optional[torch.Tensor],
         kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
@@ -460,14 +378,14 @@ class MistralModel(torch.nn.Module):
         slots: torch.Tensor,
         input_lengths: torch.Tensor,
         max_s: int,
+        true_max_s: int,
         prefill_cache_indices: Optional[torch.Tensor],
-    ) -> torch.Tensor:
-        hidden_states = self.embed_tokens(input_ids)
-
+    ):
+        hidden_states = inputs_embeds
         # Get rotary cos and sin for this forward
         # Avoid to index in each layer
         cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
-            position_ids, max_s, hidden_states.dtype
+            position_ids, true_max_s, hidden_states.dtype
         )
 
         residual = None
@@ -487,23 +405,35 @@ class MistralModel(torch.nn.Module):
             )
 
         hidden_states, _ = self.norm(hidden_states, residual)
-
         return hidden_states
 
 
 class FlashMistralForCausalLM(torch.nn.Module):
-    def __init__(self, config, weights):
+    def __init__(self, prefix, config, weights):
         super().__init__()
 
-        self.model = MistralModel(config, weights)
-        self.lm_head = TensorParallelHead.load(
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix=(
+                "model.embed_tokens" if not prefix else f"{prefix}.model.embed_tokens"
+            ),
+            weights=weights,
+        )
+        self.model = MistralModel(
+            prefix="model" if not prefix else f"{prefix}.model",
+            config=config,
+            weights=weights,
+        )
+        self.lm_head = SpeculativeHead.load(
             config,
-            prefix="lm_head",
+            prefix="lm_head" if not prefix else f"{prefix}.lm_head",
             weights=weights,
         )
         self.max_past = config.sliding_window
-        if self.max_past is None:
-            raise ValueError("max_past cannot be None")
+        self.max_past_tensor = (
+            torch.tensor(config.sliding_window, device=weights.device)
+            if self.max_past is not None
+            else None
+        )
 
     def forward(
         self,
@@ -518,17 +448,18 @@ class FlashMistralForCausalLM(torch.nn.Module):
         prefill_cache_indices: Optional[torch.Tensor],
         lm_head_indices: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
+        true_max_s = max_s
         if prefill_cache_indices is not None:
             # Slots also need to be sliced as it has the same size as the whole kv tensor
             slots = slots[prefill_cache_indices]
-        else:
+        elif self.max_past is not None:
             # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
             # kernel requires the true values
-            max_s = min(self.max_past, max_s)
-            input_lengths = torch.clamp(input_lengths, max=self.max_past)
+            input_lengths = torch.clamp(input_lengths, max=self.max_past_tensor)
 
+        inputs_embeds = self.embed_tokens(input_ids)
         hidden_states = self.model(
-            input_ids,
+            inputs_embeds,
             position_ids,
             cu_seqlen_prefill,
             kv_cache,
@@ -536,6 +467,7 @@ class FlashMistralForCausalLM(torch.nn.Module):
             slots,
             input_lengths,
             max_s,
+            true_max_s,
             prefill_cache_indices,
         )
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
new file mode 100644
index 00000000..be8cb965
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@@ -0,0 +1,655 @@
+# coding=utf-8
+# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed
+
+import numpy as np
+
+from torch import nn
+from vllm.model_executor.layers.fused_moe import fused_moe
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from typing import Optional, List, Tuple
+from loguru import logger
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    FastLinear,
+    FastRMSNorm,
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+)
+
+
+class MixtralConfig(PretrainedConfig):
+    model_type = "mixtral"
+
+    def __init__(
+        self,
+        vocab_size=32000,
+        hidden_size=4096,
+        intermediate_size=14336,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=8,
+        hidden_act="silu",
+        max_position_embeddings=4096 * 32,
+        initializer_range=0.02,
+        rms_norm_eps=1e-05,
+        use_cache=True,
+        pad_token_id=None,
+        bos_token_id=1,
+        eos_token_id=2,
+        pretraining_tp=1,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        sliding_window=None,
+        num_experts_per_tok=2,
+        num_local_experts=8,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.sliding_window = sliding_window
+
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.rms_norm_eps = rms_norm_eps
+        self.pretraining_tp = pretraining_tp
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.num_experts_per_tok = num_experts_per_tok
+        self.num_local_experts = num_local_experts
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+def promote_scalar(x: torch.Tensor) -> torch.Tensor:
+    return x.view(1) if len(x.size()) == 0 else x
+
+
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=False,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.hidden_size % config.num_attention_heads == 0
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.hidden_size // config.num_attention_heads
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=None, quantize=config.quantize)
+    )
+
+
+def _load_experts(config, prefix, mat, weights):
+    if config.quantize is not None:
+        raise NotImplementedError("Mixtral does not support weight quantization yet.")
+
+    assert mat in ["w1", "w2", "w3"]
+
+    world_size = weights.process_group.size()
+    rank = weights.process_group.rank()
+
+    assert (
+        config.intermediate_size % world_size == 0
+    ), f"The chosen size {config.intermediate_size} is not compatible with sharding on {world_size} shards"
+
+    block_size = config.intermediate_size // world_size
+    start = rank * block_size
+    stop = (rank + 1) * block_size
+
+    tensor = torch.empty(
+        (config.num_local_experts * block_size, config.hidden_size),
+        dtype=weights.dtype,
+        device=weights.device,
+    )
+
+    for i in range(config.num_local_experts):
+        slice_ = weights._get_slice(f"{prefix}.{i}.{mat}.weight")
+
+        if mat == "w2":
+            expert_slice = slice_[:, start:stop].t().contiguous()
+        else:
+            expert_slice = slice_[start:stop]
+        tensor[i * block_size : (i + 1) * block_size] = expert_slice.to(
+            dtype=weights.dtype
+        ).to(device=weights.device)
+    return tensor
+
+
+class MixtralAttention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.max_past = (
+            config.sliding_window if config.sliding_window is not None else -1
+        )
+        self.num_heads = config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.head_size,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        self.softmax_scale = self.head_size**-0.5
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        self.o_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.o_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+        prefill_cache_indices,
+    ):
+        qkv = self.query_key_value(hidden_states)
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin)
+
+        if prefill_cache_indices is not None:
+            kv_to_cache = kv[prefill_cache_indices]
+        else:
+            kv_to_cache = kv
+
+        paged_attention.reshape_and_cache(
+            kv_to_cache[:, 0], kv_to_cache[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            # flash attention
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+                window_size_left=self.max_past,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
+
+
+@torch.jit.script
+def select_experts(gate_logits: torch.Tensor, top_k: int):
+    # all_probs: (sequence_length, n_experts) and upcast for softmax
+    all_probs = torch.nn.functional.softmax(gate_logits, dim=1, dtype=torch.float)
+    # weights, selected_experts: (sequence_length, top-k)
+    weights, selected_experts = torch.topk(all_probs, top_k, dim=-1)
+    weights /= weights.sum(dim=-1, keepdim=True)
+    weights = weights.view(-1)
+    selected_experts = selected_experts.view(-1)
+
+    return selected_experts, weights
+
+
+@torch.jit.script
+def round_up(x: torch.Tensor, value: int):
+    return torch.div(x + (value - 1), value, rounding_mode="trunc") * value
+
+
+class BlockSparseMoE(nn.Module):
+    def __init__(self, prefix, config: MixtralConfig, weights):
+        super().__init__()
+        self.hidden_dim = config.hidden_size
+        self.ffn_dim = config.intermediate_size // weights.process_group.size()
+        self.num_experts = config.num_local_experts
+        self.top_k = config.num_experts_per_tok
+
+        act = config.hidden_act
+        if "gelu" in act:
+            self.act = lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        elif "silu" in act:
+            self.act = torch.nn.functional.silu
+        else:
+            self.act = ACT2FN[act]
+
+        # gating
+        self.gate = FastLinear.load(config, f"{prefix}.gate", weights, bias=False)
+
+        # merged expert weights, all of size  (n_experts * ffn_dim, hidden_dim)
+        w1 = _load_experts(config, f"{prefix}.experts", "w1", weights).view(
+            self.num_experts, self.ffn_dim, self.hidden_dim
+        )
+        w3 = _load_experts(config, f"{prefix}.experts", "w3", weights).view(
+            self.num_experts, self.ffn_dim, self.hidden_dim
+        )
+        self.w13 = torch.cat([w1, w3], dim=1)
+        self.w2 = (
+            _load_experts(config, f"{prefix}.experts", "w2", weights)
+            .view(self.num_experts, self.ffn_dim, self.hidden_dim)
+            .transpose(1, 2)
+            .contiguous()
+        )
+
+        self.process_group = weights.process_group
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # router_logits: (num_tokens, n_experts)
+        router_logits = self.gate(x)
+        out = fused_moe(
+            x,
+            self.w13,
+            self.w2,
+            router_logits,
+            self.top_k,
+            renormalize=True,
+            inplace=True,
+        )
+
+        # Reduce sum
+        if self.process_group.size() > 1:
+            torch.distributed.all_reduce(out, group=self.process_group)
+
+        return out.view(*x.shape)
+
+
+class DenseMoE(nn.Module):
+    def __init__(self, prefix, config: MixtralConfig, weights):
+        super().__init__()
+        self.hidden_dim = config.hidden_size
+        self.ffn_dim = config.intermediate_size // weights.process_group.size()
+        self.num_experts = config.num_local_experts
+        self.top_k = config.num_experts_per_tok
+
+        act = config.hidden_act
+        if "gelu" in act:
+            self.act = lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        elif "silu" in act:
+            self.act = torch.nn.functional.silu
+        else:
+            self.act = ACT2FN[act]
+
+        # gating
+        self.gate = FastLinear.load(config, f"{prefix}.gate", weights, bias=False)
+
+        self.w1 = [
+            TensorParallelColumnLinear.load(
+                config, prefix=f"{prefix}.experts.{i}.w1", weights=weights, bias=False
+            )
+            for i in range(self.num_experts)
+        ]
+        self.w3 = [
+            TensorParallelColumnLinear.load(
+                config, prefix=f"{prefix}.experts.{i}.w3", weights=weights, bias=False
+            )
+            for i in range(self.num_experts)
+        ]
+        self.w2 = [
+            TensorParallelRowLinear.load(
+                config, prefix=f"{prefix}.experts.{i}.w2", weights=weights, bias=False
+            )
+            for i in range(self.num_experts)
+        ]
+
+        self.process_group = weights.process_group
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        x: (sequence_length, model_dim)
+        gate_logits: (sequence_length, n_experts)
+        """
+        # optional reshape
+        input_shape = x.shape
+        x = x.view(-1, input_shape[-1])
+
+        # gate_logits: (sequence_length, n_experts)
+        gate_logits = self.gate(x)
+        # all_probs: (sequence_length, n_experts) and upcast for softmax
+        all_probs = torch.nn.functional.softmax(gate_logits, dim=1, dtype=torch.float)
+
+        if self.top_k < self.num_experts:
+            _, not_selected_experts = torch.topk(
+                all_probs,
+                self.num_experts - self.top_k,
+                largest=False,
+                sorted=False,
+                dim=1,
+            )
+            # Mask not selected experts
+            all_probs.scatter_(1, not_selected_experts, 0)
+
+        # Re-normalize
+        weights = all_probs / all_probs.sum(dim=1, keepdim=True)
+        weights = weights.to(x.dtype)
+
+        # Final output tensor
+        out = x.new_zeros(x.shape[0], self.hidden_dim)
+        for i in range(self.num_experts):
+            h = self.act(self.w1[i](x)) * self.w3[i](x)
+            h = self.w2[i](h, reduce=False)
+            # Add expert output to out with masking
+            out += h * weights[:, i].view(-1, 1)
+
+        # Reduce sum
+        if self.process_group.size() > 1:
+            torch.distributed.all_reduce(out, group=self.process_group)
+
+        return out
+
+
+class MixtralLayer(nn.Module):
+    def __init__(self, prefix, layer_id, config, weights):
+        super().__init__()
+        prefix = f"{prefix}.layers.{layer_id}"
+
+        self.self_attn = MixtralAttention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+
+        moe_cls = BlockSparseMoE if config.quantize is None else DenseMoE
+        self.moe = moe_cls(f"{prefix}.block_sparse_moe", config, weights)
+
+        self.input_layernorm = FastRMSNorm.load(
+            prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
+        )
+        self.post_attention_layernorm = FastRMSNorm.load(
+            prefix=f"{prefix}.post_attention_layernorm",
+            weights=weights,
+            eps=config.rms_norm_eps,
+        )
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+        prefill_cache_indices,
+    ):
+        normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
+
+        # Self Attention
+        attn_output = self.self_attn(
+            normed_hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+            prefill_cache_indices,
+        )
+
+        # faster post attention rms norm
+        normed_attn_res_output, attn_res = self.post_attention_layernorm(
+            attn_output, res
+        )
+
+        moe_output = self.moe(normed_attn_res_output)
+
+        return moe_output, attn_res
+
+
+class MixtralModel(torch.nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix=(
+                "model.embed_tokens" if not prefix else f"{prefix}.model.embed_tokens"
+            ),
+            weights=weights,
+        )
+
+        self.layers = nn.ModuleList(
+            [
+                MixtralLayer(
+                    "model" if not prefix else f"{prefix}.model",
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.norm = FastRMSNorm.load(
+            prefix="model.norm" if not prefix else f"{prefix}.model.norm",
+            weights=weights,
+            eps=config.rms_norm_eps,
+        )
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        true_max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, true_max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+                prefill_cache_indices,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class FlashMixtralForCausalLM(torch.nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+
+        self.model = MixtralModel(prefix, config, weights)
+        self.lm_head = SpeculativeHead.load(
+            config,
+            prefix="lm_head" if not prefix else f"{prefix}.lm_head",
+            weights=weights,
+        )
+        self.max_past = config.sliding_window
+        self.max_past_tensor = (
+            torch.tensor(config.sliding_window, device=weights.device)
+            if self.max_past is not None
+            else None
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        true_max_s = max_s
+        if prefill_cache_indices is not None:
+            # Slots also need to be sliced as it has the same size as the whole kv tensor
+            slots = slots[prefill_cache_indices]
+        elif self.max_past is not None:
+            # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
+            # kernel requires the true values
+            input_lengths = torch.clamp(input_lengths, max=self.max_past_tensor)
+
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+            true_max_s,
+            prefill_cache_indices,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits = self.lm_head(hidden_states)
+        return logits
diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
index eea5f787..ee062d3d 100644
--- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py
@@ -33,7 +33,7 @@ from text_generation_server.utils.layers import (
     TensorParallelRowLinear,
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
-    TensorParallelHead,
+    SpeculativeHead,
     FastLayerNorm,
     PositionRotaryEmbedding,
     get_linear,
@@ -91,6 +91,8 @@ class FlashNeoxAttention(torch.nn.Module):
         self.hidden_size = hidden_size
         self.head_size = hidden_size // num_heads
 
+        self.rotary_dim = int(config.rotary_pct * self.head_size)
+
         if self.num_heads % weights.process_group.size() != 0:
             raise ValueError(
                 f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
@@ -98,8 +100,11 @@ class FlashNeoxAttention(torch.nn.Module):
             )
         self.num_heads = self.num_heads // weights.process_group.size()
 
-        self.rotary_emb = PositionRotaryEmbedding.load(
-            config=config, prefix=f"{prefix}.rotary_emb", weights=weights
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.rotary_dim,
+            base=config.rotary_emb_base,
+            device=weights.device,
         )
 
         self.softmax_scale = self.head_size ** (-0.5)
@@ -182,9 +187,9 @@ class FlashMLP(nn.Module):
             if "gelu" not in act
             else lambda x: torch.nn.functional.gelu(
                 x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
             )
         )
 
@@ -364,7 +369,7 @@ class FlashGPTNeoXForCausalLM(FlashGPTNeoXPreTrainedModel):
         super().__init__(config)
         self.gpt_neox = FlashGPTNeoXModel(config, weights)
 
-        self.embed_out = TensorParallelHead.load(
+        self.embed_out = SpeculativeHead.load(
             config, prefix="embed_out", weights=weights
         )
 
diff --git a/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
new file mode 100644
index 00000000..cfe447a7
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_phi_modeling.py
@@ -0,0 +1,410 @@
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from typing import Optional, List, Tuple
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+    FastLayerNorm,
+)
+
+
+class PhiConfig(PretrainedConfig):
+    def __init__(
+        self,
+        vocab_size=51200,
+        hidden_size=2560,
+        num_hidden_layers=32,
+        num_attention_heads=32,
+        num_key_value_heads=32,
+        hidden_act="gelu_fast",  # llama uses silu
+        layer_norm_eps=1e-05,  # rms in llama,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        tie_word_embeddings=False,
+        rope_theta=10000.0,
+        resid_pdrop=0.1,  # llama doesn't have this
+        partial_rotary_factor=0.5,  # important difference between llama and phi
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.num_key_value_heads = num_key_value_heads
+        self.hidden_act = hidden_act
+        self.layer_norm_eps = layer_norm_eps
+        self.rope_theta = rope_theta
+        self.resid_pdrop = resid_pdrop
+        self.partial_rotary_factor = partial_rotary_factor
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+# this is the same as llama except for Phi uses bias=True
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=True,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.hidden_size % config.num_attention_heads == 0
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.hidden_size // config.num_attention_heads
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    # this is the same as llama except for Phi uses bias=True
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=True, quantize=config.quantize)
+    )
+
+
+class FlashPhiAttention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.num_heads = config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.softmax_scale = self.head_size**-0.5
+        self.rotary_dim = int(config.partial_rotary_factor * self.head_size)
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.rotary_dim,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        # in llama the dense layer is called "o_proj" and has bias=False
+        self.dense = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.dense",
+            weights=weights,
+            bias=True,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        # Compute query, key, value and split
+        qkv = self.query_key_value(hidden_states)
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+
+        # Reshape query and key for rotary embeddings
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        # NOTE: this is the main difference between Llama and Phi
+        # in llama the rotary embeddings are applied to the whole query and key.
+        # Phi uses PARTIAL rotary embeddings, which are applied to the first 32 dimensions
+        #
+        # Apply partial positional embeddings in place
+        self.rotary_emb(
+            query[:, :, : self.rotary_dim], kv[:, 0, :, : self.rotary_dim], cos, sin
+        )
+
+        # Reshape key and value and cache
+        paged_attention.reshape_and_cache(
+            kv[:, 0], kv[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.dense(attn_output.view(-1, self.num_heads * self.head_size))
+
+
+class PhiMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        )
+
+        # llama weights are up_proj and down_proj and bias=False
+        self.up_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.fc1",
+            weights=weights,
+            bias=True,
+        )
+        self.down_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.fc2",
+            weights=weights,
+            bias=True,
+        )
+
+    def forward(self, hidden_states):
+        # NOTE: Llama requires the gate up states to an intermediate size
+        # Phi does not and we can avoid the `view` operation
+        return self.down_proj(self.act(self.up_proj(hidden_states)))
+
+
+class FlashPhiLayer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"model.layers.{layer_id}"
+        self.self_attn = FlashPhiAttention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+        self.mlp = PhiMLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
+        self.input_layernorm = FastLayerNorm.load(
+            prefix=f"{prefix}.input_layernorm",
+            weights=weights,
+            eps=config.layer_norm_eps,
+        )
+        self.resid_dropout = torch.nn.Dropout(config.resid_pdrop)
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+    ):
+        hidden_states, res = self.input_layernorm(hidden_states, residual)
+        # Self Attention
+        attn_output = self.self_attn(
+            hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+
+        hidden_states = self.resid_dropout(attn_output).add(
+            self.resid_dropout(self.mlp(hidden_states))
+        )
+
+        return hidden_states, res
+
+
+class FlashPhiModel(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        process_group = weights.process_group
+        self.tp_rank = process_group.rank()
+        self.tp_world_size = process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="model.embed_tokens", weights=weights
+        )
+        self.layers = nn.ModuleList(
+            [
+                FlashPhiLayer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.gradient_checkpointing = False
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+        self.norm = FastLayerNorm.load(
+            prefix="model.final_layernorm",
+            weights=weights,
+            eps=config.layer_norm_eps,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class FlashPhiForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = FlashPhiModel(config, weights)
+        self.lm_head = SpeculativeHead.load(
+            config,
+            prefix="lm_head",
+            weights=weights,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+
+        return self.lm_head(hidden_states)
diff --git a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
new file mode 100644
index 00000000..94023b33
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
@@ -0,0 +1,400 @@
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from typing import Optional, List, Tuple
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+    FastRMSNorm,
+)
+
+
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=True,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.hidden_size % config.num_attention_heads == 0
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.hidden_size // config.num_attention_heads
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    w = [
+        weights.get_sharded(f"{p}.bias", dim=0)
+        for p in [f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"]
+    ]
+    bias = torch.cat(w, dim=0).to(dtype=weights.dtype).to(device=weights.device)
+
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=bias, quantize=config.quantize)
+    )
+
+
+class Qwen2Attention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.max_past = (
+            config.sliding_window if config.sliding_window is not None else -1
+        )
+        self.num_heads = config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.head_size,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        self.softmax_scale = self.head_size**-0.5
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        self.o_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.o_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+        prefill_cache_indices,
+    ):
+        qkv = self.query_key_value(hidden_states)
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin)
+
+        if prefill_cache_indices is not None:
+            kv_to_cache = kv[prefill_cache_indices]
+        else:
+            kv_to_cache = kv
+
+        paged_attention.reshape_and_cache(
+            kv_to_cache[:, 0], kv_to_cache[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            # flash attention
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+                window_size_left=self.max_past,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
+
+
+class Qwen2MLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        )
+        # Fuse gate and up proj
+        self.gate_up_proj = TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.gate_proj", f"{prefix}.up_proj"],
+            weights=weights,
+            dim=0,
+            bias=False,
+        )
+        self.down_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.down_proj",
+            weights=weights,
+            bias=False,
+        )
+        self.intermediate_size = (
+            config.intermediate_size // weights.process_group.size()
+        )
+
+    def forward(self, hidden_states):
+        gate_up_states = self.gate_up_proj(hidden_states)
+        gate_up_states = gate_up_states.view(-1, 2, self.intermediate_size)
+        return self.down_proj(self.act(gate_up_states[:, 0]) * gate_up_states[:, 1])
+
+
+class Qwen2Layer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"model.layers.{layer_id}"
+        self.self_attn = Qwen2Attention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+        self.mlp = Qwen2MLP(prefix=f"{prefix}.mlp", config=config, weights=weights)
+        self.input_layernorm = FastRMSNorm.load(
+            prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.rms_norm_eps
+        )
+        self.post_attention_layernorm = FastRMSNorm.load(
+            prefix=f"{prefix}.post_attention_layernorm",
+            weights=weights,
+            eps=config.rms_norm_eps,
+        )
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+        prefill_cache_indices,
+    ):
+        normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
+
+        # Self Attention
+        attn_output = self.self_attn(
+            normed_hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+            prefill_cache_indices,
+        )
+
+        # faster post attention rms norm
+        normed_attn_res_output, attn_res = self.post_attention_layernorm(
+            attn_output, res
+        )
+
+        mlp_output = self.mlp(normed_attn_res_output)
+
+        return mlp_output, attn_res
+
+
+class Qwen2Model(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        process_group = weights.process_group
+        self.tp_rank = process_group.rank()
+        self.tp_world_size = process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="model.embed_tokens", weights=weights
+        )
+        self.layers = nn.ModuleList(
+            [
+                Qwen2Layer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.norm = FastRMSNorm.load(
+            prefix="model.norm", weights=weights, eps=config.rms_norm_eps
+        )
+
+        self.gradient_checkpointing = False
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        true_max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, true_max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+                prefill_cache_indices,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class Qwen2ForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = Qwen2Model(config, weights)
+        self.lm_head = SpeculativeHead.load(
+            config,
+            prefix="lm_head",
+            weights=weights,
+        )
+        self.max_past = config.sliding_window
+        self.max_past_tensor = (
+            torch.tensor(config.sliding_window, device=weights.device)
+            if self.max_past is not None
+            else None
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor] = None,
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        true_max_s = max_s
+        if prefill_cache_indices is not None:
+            # Slots also need to be sliced as it has the same size as the whole kv tensor
+            slots = slots[prefill_cache_indices]
+        elif self.max_past is not None:
+            # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
+            # kernel requires the true values
+            input_lengths = torch.clamp(input_lengths, max=self.max_past_tensor)
+
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+            true_max_s,
+            prefill_cache_indices,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits = self.lm_head(hidden_states)
+        return logits
diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
index 6a530f3c..a9127d1f 100644
--- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py
@@ -12,7 +12,7 @@ from text_generation_server.utils.layers import (
     TensorParallelRowLinear,
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
-    TensorParallelHead,
+    SpeculativeHead,
     FastLayerNorm,
     PositionRotaryEmbedding,
     get_linear,
@@ -613,9 +613,7 @@ class FlashRWForCausalLM(FlashRWPreTrainedModel):
 
         self.transformer = FlashRWModel(config, weights)
 
-        self.lm_head = TensorParallelHead.load(
-            config, prefix="lm_head", weights=weights
-        )
+        self.lm_head = SpeculativeHead.load(config, prefix="lm_head", weights=weights)
 
     def forward(
         self,
diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
index c3c7617a..bbb603a7 100644
--- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py
@@ -6,11 +6,10 @@ from transformers.activations import ACT2FN
 from typing import Optional, List, Tuple
 
 from text_generation_server.utils import paged_attention, flash_attn
-from text_generation_server.utils.flash_attn import attention
 from text_generation_server.utils.layers import (
     TensorParallelRowLinear,
     TensorParallelColumnLinear,
-    TensorParallelHead,
+    SpeculativeHead,
     TensorParallelEmbedding,
     FastLayerNorm,
     get_linear,
@@ -70,9 +69,22 @@ def _load_multi_mqa_gptq(
         qzeros = torch.cat([q_tensor, kv_tensor], dim=1)
         qzeros = qzeros.to(device=weights.device)
 
-        g_idx = weights.get_tensor(f"{prefix}.c_attn.g_idx")
-        g_idx = g_idx.to(device=weights.device)
-        bits, groupsize = weights._get_gptq_params()
+        (
+            bits,
+            groupsize,
+            _,
+            quant_method,
+        ) = weights._get_gptq_params()
+        if quant_method == "gptq":
+            g_idx = weights.get_tensor(f"{prefix}.c_attn.g_idx")
+            g_idx = g_idx.to(device=weights.device)
+        elif quant_method == "awq":
+            g_idx = None
+            from text_generation_server.utils.awq.conversion_utils import (
+                fast_awq_to_gptq,
+            )
+
+            qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
 
         from text_generation_server.utils.layers import HAS_EXLLAMA
 
@@ -299,9 +311,9 @@ class MLP(nn.Module):
             if "gelu" not in act
             else lambda x: torch.nn.functional.gelu(
                 x,
-                approximate="tanh"
-                if act in ["gelu_fast", "gelu_pytorch_tanh"]
-                else "none",
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
             )
         )
 
@@ -441,7 +453,7 @@ class FlashSantacoderForCausalLM(nn.Module):
     def __init__(self, config, weights):
         super().__init__()
         self.transformer = FlashSantacoderModel(config, weights)
-        self.lm_head = TensorParallelHead.load(
+        self.lm_head = SpeculativeHead.load(
             config, prefix="transformer.wte", weights=weights
         )
 
diff --git a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
new file mode 100644
index 00000000..ed77af78
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
@@ -0,0 +1,545 @@
+# coding=utf-8
+# Copyright 2024 Starcoder2 AI and the HuggingFace Inc. team. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torch.distributed
+
+from torch import nn
+from transformers.activations import ACT2FN
+from transformers.configuration_utils import PretrainedConfig
+from typing import Optional, List, Tuple
+
+from text_generation_server.utils import paged_attention, flash_attn
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    PositionRotaryEmbedding,
+    SpeculativeHead,
+    get_linear,
+    FastRMSNorm,
+    FastLayerNorm,
+)
+
+
+class Starcoder2Config(PretrainedConfig):
+    model_type = "starcoder2"
+
+    def __init__(
+        self,
+        vocab_size=49152,
+        hidden_size=3072,
+        intermediate_size=12288,
+        num_hidden_layers=30,
+        num_attention_heads=24,
+        num_key_value_heads=2,
+        mlp_type="default",
+        hidden_act="gelu_pytorch_tanh",
+        max_position_embeddings=4096,
+        initializer_range=0.018042,
+        norm_type="layer_norm",
+        norm_epsilon=1e-5,
+        use_cache=True,
+        bos_token_id=50256,
+        eos_token_id=50256,
+        rope_theta=10000.0,
+        sliding_window=None,
+        attention_dropout=0.0,
+        residual_dropout=0.0,
+        embedding_dropout=0.0,
+        use_bias: bool = True,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.max_position_embeddings = max_position_embeddings
+        self.hidden_size = hidden_size
+        self.intermediate_size = intermediate_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.sliding_window = sliding_window
+        self.use_bias = use_bias
+
+        # for backward compatibility
+        if num_key_value_heads is None:
+            num_key_value_heads = num_attention_heads
+
+        self.num_key_value_heads = num_key_value_heads
+        self.mlp_type = mlp_type
+        self.hidden_act = hidden_act
+        self.initializer_range = initializer_range
+        self.norm_type = norm_type
+        self.norm_epsilon = norm_epsilon
+        self.use_cache = use_cache
+        self.rope_theta = rope_theta
+        self.attention_dropout = attention_dropout
+        self.residual_dropout = residual_dropout
+        self.embedding_dropout = embedding_dropout
+
+        super().__init__(
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            **kwargs,
+        )
+
+
+def load_attention(config, prefix, weights):
+    if config.num_attention_heads != config.num_key_value_heads:
+        return _load_gqa(config, prefix, weights)
+    else:
+        return TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+            dim=0,
+            weights=weights,
+            bias=config.use_bias,
+        )
+
+
+def _load_gqa(config, prefix: str, weights):
+    assert config.hidden_size % config.num_attention_heads == 0
+    assert config.num_attention_heads % weights.process_group.size() == 0
+
+    weight = weights.get_multi_weights_col(
+        prefixes=[f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"],
+        quantize=config.quantize,
+        dim=0,
+    )
+
+    if config.quantize not in ["gptq", "awq"]:
+        weight = weight.to(dtype=weights.dtype).to(device=weights.device)
+
+        head_size = config.hidden_size // config.num_attention_heads
+        num_heads = config.num_attention_heads // weights.process_group.size()
+        num_key_value_heads = config.num_key_value_heads // weights.process_group.size()
+        assert list(weight.shape) == [
+            (num_heads + 2 * num_key_value_heads) * head_size,
+            config.hidden_size,
+        ], f"{list(weight.shape)} != {[(num_heads + 2 * config.num_key_value_heads) * head_size, config.hidden_size]}"
+
+    if config.use_bias:
+        w = [
+            weights.get_sharded(f"{p}.bias", dim=0)
+            for p in [f"{prefix}.q_proj", f"{prefix}.k_proj", f"{prefix}.v_proj"]
+        ]
+        bias = torch.cat(w, dim=0).to(dtype=weights.dtype).to(device=weights.device)
+    else:
+        bias = None
+
+    return TensorParallelColumnLinear(
+        get_linear(weight, bias=bias, quantize=config.quantize)
+    )
+
+
+class Starcoder2Attention(torch.nn.Module):
+    def __init__(
+        self,
+        prefix: str,
+        config,
+        weights,
+    ):
+        super().__init__()
+        self.max_past = (
+            config.sliding_window if config.sliding_window is not None else -1
+        )
+        self.num_heads = config.num_attention_heads
+        self.hidden_size = config.hidden_size
+        self.head_size = self.hidden_size // self.num_heads
+
+        self.rotary_emb = PositionRotaryEmbedding.static(
+            config=config,
+            dim=self.head_size,
+            base=config.rope_theta,
+            device=weights.device,
+        )
+
+        self.softmax_scale = self.head_size**-0.5
+
+        if self.num_heads % weights.process_group.size() != 0:
+            raise ValueError(
+                f"`num_heads` must be divisible by `num_shards` (got `num_heads`: {self.num_heads} "
+                f"and `num_shards`: {weights.process_group.size()}"
+            )
+        self.num_heads = self.num_heads // weights.process_group.size()
+        self.num_key_value_heads = (
+            config.num_key_value_heads // weights.process_group.size()
+        )
+
+        self.query_key_value = load_attention(config, prefix, weights)
+
+        self.o_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.o_proj",
+            weights=weights,
+            bias=config.use_bias,
+        )
+        self.num_groups = self.num_heads // self.num_key_value_heads
+        self.kv_head_mapping = torch.arange(
+            0, self.num_key_value_heads, dtype=torch.int32, device=weights.device
+        ).repeat_interleave(self.num_groups)
+
+    def forward(
+        self,
+        hidden_states,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+        prefill_cache_indices,
+    ):
+        qkv = self.query_key_value(hidden_states)
+        query, kv = qkv.split(
+            [
+                self.head_size * self.num_heads,
+                2 * self.head_size * self.num_key_value_heads,
+            ],
+            dim=1,
+        )
+        query = query.view(-1, self.num_heads, self.head_size)
+        kv = kv.view(-1, 2, self.num_key_value_heads, self.head_size)
+
+        self.rotary_emb(query, torch.select(kv, dim=1, index=0), cos, sin)
+
+        if prefill_cache_indices is not None:
+            kv_to_cache = kv[prefill_cache_indices]
+        else:
+            kv_to_cache = kv
+
+        paged_attention.reshape_and_cache(
+            kv_to_cache[:, 0], kv_to_cache[:, 1], kv_cache[0], kv_cache[1], slots
+        )
+
+        # output tensor
+        attn_output = torch.empty_like(query)
+
+        # Prefill
+        if cu_seqlen_prefill is not None:
+            # flash attention
+            flash_attn.attention(
+                query,
+                torch.select(kv, dim=1, index=0),
+                torch.select(kv, dim=1, index=1),
+                attn_output,
+                cu_seqlen_prefill,
+                max_s,
+                self.softmax_scale,
+                window_size_left=self.max_past,
+            )
+        # Decode
+        else:
+            paged_attention.attention(
+                attn_output,
+                query,
+                kv_cache[0],
+                kv_cache[1],
+                self.kv_head_mapping,
+                self.softmax_scale,
+                block_tables,
+                input_lengths,
+                max_s,
+            )
+
+        return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
+
+
+class Starcoder2MLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        )
+        # Fuse gate and up proj
+        self.c_fc = TensorParallelColumnLinear.load(
+            config,
+            prefix=f"{prefix}.c_fc",
+            weights=weights,
+            bias=config.use_bias,
+        )
+        self.c_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.c_proj",
+            weights=weights,
+            bias=config.use_bias,
+        )
+
+    def forward(self, hidden_states):
+        hidden_states = self.c_fc(hidden_states)
+        hidden_states = self.act(hidden_states)
+        return self.c_proj(hidden_states)
+
+
+class Starcoder2GatedMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        act = config.hidden_act
+        self.act = (
+            ACT2FN[act]
+            if "gelu" not in act
+            else lambda x: torch.nn.functional.gelu(
+                x,
+                approximate=(
+                    "tanh" if act in ["gelu_fast", "gelu_pytorch_tanh"] else "none"
+                ),
+            )
+        )
+        # Fuse gate and up proj
+        self.gate_up_proj = TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{prefix}.gate_proj", f"{prefix}.up_proj"],
+            weights=weights,
+            dim=0,
+            bias=config.use_bias,
+        )
+        self.down_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.down_proj",
+            weights=weights,
+            bias=config.use_bias,
+        )
+        self.intermediate_size = (
+            config.intermediate_size // weights.process_group.size()
+        )
+
+    def forward(self, hidden_states):
+        gate_up_states = self.gate_up_proj(hidden_states)
+        gate_up_states = gate_up_states.view(-1, 2, self.intermediate_size)
+        return self.down_proj(self.act(gate_up_states[:, 0]) * gate_up_states[:, 1])
+
+
+STARCODER2_NORMALIZATION_CLASSES = {
+    "layer_norm": FastLayerNorm,
+    "rms_norm": FastRMSNorm,
+}
+
+STARCODER2_MLP_CLASSES = {
+    "default": Starcoder2MLP,
+    "gated": Starcoder2GatedMLP,
+}
+
+
+class Starcoder2Layer(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        prefix = f"model.layers.{layer_id}"
+        self.self_attn = Starcoder2Attention(
+            prefix=f"{prefix}.self_attn", config=config, weights=weights
+        )
+
+        self.mlp = STARCODER2_MLP_CLASSES[config.mlp_type](
+            prefix=f"{prefix}.mlp", config=config, weights=weights
+        )
+
+        self.input_layernorm = STARCODER2_NORMALIZATION_CLASSES[config.norm_type].load(
+            prefix=f"{prefix}.input_layernorm", weights=weights, eps=config.norm_epsilon
+        )
+        self.post_attention_layernorm = STARCODER2_NORMALIZATION_CLASSES[
+            config.norm_type
+        ].load(
+            prefix=f"{prefix}.post_attention_layernorm",
+            weights=weights,
+            eps=config.norm_epsilon,
+        )
+
+    def forward(
+        self,
+        hidden_states,
+        residual,
+        cos,
+        sin,
+        cu_seqlen_prefill,
+        kv_cache,
+        block_tables,
+        slots,
+        input_lengths,
+        max_s,
+        prefill_cache_indices,
+    ):
+        normed_hidden_states, res = self.input_layernorm(hidden_states, residual)
+
+        # Self Attention
+        attn_output = self.self_attn(
+            normed_hidden_states,
+            cos,
+            sin,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+            prefill_cache_indices,
+        )
+
+        # faster post attention rms norm
+        normed_attn_res_output, attn_res = self.post_attention_layernorm(
+            attn_output, res
+        )
+
+        mlp_output = self.mlp(normed_attn_res_output)
+
+        return mlp_output, attn_res
+
+
+class Starcoder2Model(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        process_group = weights.process_group
+        self.tp_rank = process_group.rank()
+        self.tp_world_size = process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="model.embed_tokens", weights=weights
+        )
+        self.layers = nn.ModuleList(
+            [
+                Starcoder2Layer(
+                    layer_id,
+                    config,
+                    weights,
+                )
+                for layer_id in range(config.num_hidden_layers)
+            ]
+        )
+        self.norm = STARCODER2_NORMALIZATION_CLASSES[config.norm_type].load(
+            prefix="model.norm", weights=weights, eps=config.norm_epsilon
+        )
+
+        self.gradient_checkpointing = False
+
+        self.head_size = self.layers[0].self_attn.head_size
+        self.num_heads = self.layers[0].self_attn.num_heads
+        self.num_key_value_heads = self.layers[0].self_attn.num_key_value_heads
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        true_max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
+    ) -> torch.Tensor:
+        hidden_states = self.embed_tokens(input_ids)
+
+        # Get rotary cos and sin for this forward
+        # Avoid to index in each layer
+        cos, sin = self.layers[0].self_attn.rotary_emb.get_cos_sin(
+            position_ids, true_max_s, hidden_states.dtype
+        )
+
+        residual = None
+        for i, layer in enumerate(self.layers):
+            hidden_states, residual = layer(
+                hidden_states,
+                residual,
+                cos,
+                sin,
+                cu_seqlen_prefill,
+                kv_cache[i],
+                block_tables,
+                slots,
+                input_lengths,
+                max_s,
+                prefill_cache_indices,
+            )
+
+        hidden_states, _ = self.norm(hidden_states, residual)
+
+        return hidden_states
+
+
+class FlashStarcoder2ForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+
+        self.model = Starcoder2Model(config, weights)
+        try:
+            self.lm_head = SpeculativeHead.load(
+                config,
+                prefix="lm_head",
+                weights=weights,
+            )
+        except RuntimeError:
+            self.lm_head = SpeculativeHead.load(
+                config,
+                prefix="model.embed_tokens",
+                weights=weights,
+            )
+
+        self.max_past = config.sliding_window
+        self.max_past_tensor = (
+            torch.tensor(config.sliding_window, device=weights.device)
+            if self.max_past is not None
+            else None
+        )
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
+        lm_head_indices: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        true_max_s = max_s
+        if prefill_cache_indices is not None:
+            # Slots also need to be sliced as it has the same size as the whole kv tensor
+            slots = slots[prefill_cache_indices]
+        elif self.max_past is not None:
+            # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
+            # kernel requires the true values
+            input_lengths = torch.clamp(input_lengths, max=self.max_past_tensor)
+
+        hidden_states = self.model(
+            input_ids,
+            position_ids,
+            cu_seqlen_prefill,
+            kv_cache,
+            block_tables,
+            slots,
+            input_lengths,
+            max_s,
+            true_max_s,
+            prefill_cache_indices,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits = self.lm_head(hidden_states)
+        return logits
diff --git a/server/text_generation_server/models/custom_modeling/idefics_config.py b/server/text_generation_server/models/custom_modeling/idefics_config.py
index 0bdb2e3d..a5565819 100644
--- a/server/text_generation_server/models/custom_modeling/idefics_config.py
+++ b/server/text_generation_server/models/custom_modeling/idefics_config.py
@@ -66,6 +66,7 @@ class IdeficsVisionConfig(PretrainedConfig):
         initializer_range (`float`, *optional*, defaults to 0.02):
             The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
     """
+
     model_type = "idefics"
     attribute_map = {
         "hidden_size": "embed_dim",
@@ -125,6 +126,7 @@ class IdeficsPerceiverConfig(PretrainedConfig):
         qk_layer_norms_perceiver (`bool`, *optional*, defaults to `False`):
             Whether or not to use qk layer norms in perceiver
     """
+
     model_type = "idefics"
 
     def __init__(
@@ -219,6 +221,7 @@ class IdeficsConfig(PretrainedConfig):
     >>> # Accessing the model configuration
     >>> configuration = model.config
     ```"""
+
     model_type = "idefics"
     is_composition = True
 
diff --git a/server/text_generation_server/models/custom_modeling/idefics_image_processing.py b/server/text_generation_server/models/custom_modeling/idefics_image_processing.py
index 4760ae6f..e323d365 100644
--- a/server/text_generation_server/models/custom_modeling/idefics_image_processing.py
+++ b/server/text_generation_server/models/custom_modeling/idefics_image_processing.py
@@ -198,7 +198,9 @@ class IdeficsImageProcessor(BaseImageProcessor):
             image = image_url_or_urls
 
             if image.startswith("http://") or image.startswith("https://"):
-                response = requests.get(image_url_or_urls, stream=True, headers=headers, timeout=(1, 5))
+                response = requests.get(
+                    image_url_or_urls, stream=True, headers=headers, timeout=(1, 5)
+                )
                 response.raise_for_status()
                 content = response.content
             elif image.startswith("data:"):
@@ -213,7 +215,7 @@ class IdeficsImageProcessor(BaseImageProcessor):
                 image = Image.open(BytesIO(content))
                 # image.verify()
             except Exception:
-                raise ValueError(f"Could not load image from url {image_url_or_urls}")    
+                raise ValueError(f"Could not load image from url {image_url_or_urls}")
             return image
         else:
             raise ValueError(
diff --git a/server/text_generation_server/models/custom_modeling/idefics_modeling.py b/server/text_generation_server/models/custom_modeling/idefics_modeling.py
index 946f7683..ee4cdb08 100644
--- a/server/text_generation_server/models/custom_modeling/idefics_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/idefics_modeling.py
@@ -51,7 +51,7 @@ from text_generation_server.utils.layers import (
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     TensorParallelRowLinear,
-    TensorParallelHead,
+    SpeculativeHead,
     PositionRotaryEmbedding,
     FastLinear,
 )
@@ -62,6 +62,7 @@ if IS_CUDA_SYSTEM:
 elif IS_ROCM_SYSTEM:
     from vllm import layernorm_ops
 
+
 @dataclass
 class BaseModelOutputWithPastImage(BaseModelOutputWithPast):
     image_hidden_states: Optional[torch.FloatTensor] = None
@@ -122,10 +123,10 @@ def expand_inputs_for_generation(
             raise ValueError(
                 "If `is_encoder_decoder` is True, make sure that `encoder_outputs` is defined."
             )
-        encoder_outputs[
-            "last_hidden_state"
-        ] = encoder_outputs.last_hidden_state.index_select(
-            0, expanded_return_idx.to(encoder_outputs.last_hidden_state.device)
+        encoder_outputs["last_hidden_state"] = (
+            encoder_outputs.last_hidden_state.index_select(
+                0, expanded_return_idx.to(encoder_outputs.last_hidden_state.device)
+            )
         )
         model_kwargs["encoder_outputs"] = encoder_outputs
     return input_ids, model_kwargs
@@ -271,9 +272,7 @@ class IdeficsDecoupledTensorParallelLinear(nn.Module):
         weights,
     ) -> None:
         super().__init__()
-        self.fc = TensorParallelHead.load(
-            config=config, prefix="lm_head", weights=weights
-        )
+        self.fc = SpeculativeHead.load(config=config, prefix="lm_head", weights=weights)
         self.additional_fc = FastLinear.load(
             config=config,
             prefix="lm_head.additional_fc",
@@ -282,11 +281,11 @@ class IdeficsDecoupledTensorParallelLinear(nn.Module):
         )
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
-        output = self.fc(input)
+        output, speculative_logits = self.fc(input)
         additional_features = self.additional_fc(input)
         output = torch.cat((output, additional_features), -1)
 
-        return output
+        return output, speculative_logits
 
     def extra_repr(self) -> str:
         """Overwriting `nn.Linear.extra_repr` to include new parameters."""
@@ -431,7 +430,9 @@ class IdeficsRMSNorm(nn.Module):
 
             return out
         else:
-            raise ValueError("Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction.")
+            raise ValueError(
+                "Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction."
+            )
 
 
 # this was adapted from LlamaMLP
@@ -613,8 +614,13 @@ class IdeficsAttention(nn.Module):
 
             query_shape = query_states.shape
             key_shape = key_states.shape
-            self.rotary_emb(query_states.view(-1, *query_shape[2:]), key_states.reshape(-1, *key_shape[2:]), cos, sin)
-            
+            self.rotary_emb(
+                query_states.view(-1, *query_shape[2:]),
+                key_states.reshape(-1, *key_shape[2:]),
+                cos,
+                sin,
+            )
+
             query_states = query_states.view(query_shape)
             key_states = key_states.view(key_shape)
 
@@ -1495,17 +1501,20 @@ class IdeficsForVisionText2Text(IdeficsPreTrainedModel):
         )
 
         hidden_states = outputs[0]
-        logits = self.lm_head(hidden_states)
+        logits, speculative_logits = self.lm_head(hidden_states)
 
         loss = None
 
-        return CausalLMOutputWithPastImage(
-            loss=loss,
-            logits=logits,
-            past_key_values=outputs.past_key_values,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
-            image_hidden_states=outputs.image_hidden_states,
+        return (
+            CausalLMOutputWithPastImage(
+                loss=loss,
+                logits=logits,
+                past_key_values=outputs.past_key_values,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+                image_hidden_states=outputs.image_hidden_states,
+            ),
+            speculative_logits,
         )
 
     def prepare_inputs_for_generation(self, input_ids, past=None, **kwargs):
diff --git a/server/text_generation_server/models/custom_modeling/idefics_processing.py b/server/text_generation_server/models/custom_modeling/idefics_processing.py
index 98e43a27..7bba6977 100644
--- a/server/text_generation_server/models/custom_modeling/idefics_processing.py
+++ b/server/text_generation_server/models/custom_modeling/idefics_processing.py
@@ -112,6 +112,7 @@ def is_url(string):
     result = urlparse(string)
     return all([result.scheme, result.netloc])
 
+
 def is_image(string):
     """Checks if the passed string contains a valid url and nothing else. e.g. if space is included it's immediately
     invalidated the url"""
@@ -132,6 +133,7 @@ class IdeficsProcessor(ProcessorMixin):
             An instance of [`LlamaTokenizerFast`]. The tokenizer is a required input.
         image_size (`int`, *optional*, defaults to 224): Image size (assuming a square image)
     """
+
     attributes = ["image_processor", "tokenizer"]
     image_processor_class = "IdeficsImageProcessor"
     tokenizer_class = "LlamaTokenizerFast"
@@ -344,7 +346,6 @@ class IdeficsProcessor(ProcessorMixin):
 
             image_objects = self.image_processor(image_objects, transform=transform)
 
-
             text_encoding = self.tokenizer(
                 text=full_text,
                 add_special_tokens=False,
diff --git a/server/text_generation_server/models/custom_modeling/llava_next.py b/server/text_generation_server/models/custom_modeling/llava_next.py
new file mode 100644
index 00000000..ed21a52b
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/llava_next.py
@@ -0,0 +1,302 @@
+# coding=utf-8
+# Copyright 2024 the HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" PyTorch Llava-NeXT model."""
+
+from typing import List, Optional, Tuple, Union
+
+import torch
+import torch.utils.checkpoint
+from torch import nn
+
+from transformers.activations import ACT2FN
+from transformers.image_processing_utils import select_best_resolution
+
+from text_generation_server.utils.layers import (
+    TensorParallelColumnLinear,
+    TensorParallelRowLinear,
+)
+
+
+def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size):
+    """
+    Calculate the shape of the image patch grid after the preprocessing for images of any resolution.
+
+    Args:
+        image_size (`tuple`):
+            The size of the input image in the format (width, height).
+        grid_pinpoints (`List`):
+            A list containing possible resolutions. Each item in the list should be a tuple or list
+            of the form `(height, width)`.
+        patch_size (`int`):
+            The size of each image patch.
+
+    Returns:
+        tuple: The shape of the image patch grid in the format (width, height).
+    """
+    if not isinstance(grid_pinpoints, list):
+        raise ValueError("grid_pinpoints should be a list of tuples or lists")
+
+    height, width = select_best_resolution(image_size, grid_pinpoints)
+    return height // patch_size, width // patch_size
+
+
+def unpad_image(tensor, original_size):
+    """
+    Unpads a PyTorch tensor of a padded and resized image.
+
+    Args:
+        tensor (`torch.Tensor`):
+            The image tensor, assumed to be of shape (num_channels, height, width).
+        original_size (`tuple`):
+            The original size of the image (height, width).
+
+    Returns:
+        `torch.Tensor`: The unpadded image tensor.
+    """
+    original_height, original_width = original_size
+    current_height, current_width = tensor.shape[1:]
+
+    original_aspect_ratio = original_width / original_height
+    current_aspect_ratio = current_width / current_height
+
+    if original_aspect_ratio > current_aspect_ratio:
+        scale_factor = current_width / original_width
+        new_height = int(original_height * scale_factor)
+        padding = (current_height - new_height) // 2
+        unpadded_tensor = tensor[:, padding : current_height - padding, :]
+    else:
+        scale_factor = current_height / original_height
+        new_width = int(original_width * scale_factor)
+        padding = (current_width - new_width) // 2
+        unpadded_tensor = tensor[:, :, padding : current_width - padding]
+
+    return unpadded_tensor
+
+
+# Copied from transformers.models.llava.modeling_llava.LlavaMultiModalProjector with Llava->LlavaNext
+class LlavaNextMultiModalProjector(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+
+        self.linear_1 = TensorParallelColumnLinear.load(
+            prefix=f"{prefix}.linear_1", config=config, weights=weights, bias=True
+        )
+        self.act = ACT2FN[config.projector_hidden_act]
+        self.linear_2 = TensorParallelRowLinear.load(
+            prefix=f"{prefix}.linear_2", config=config, weights=weights, bias=True
+        )
+
+    def forward(self, image_features):
+        hidden_states = self.linear_1(image_features)
+        hidden_states = self.act(hidden_states)
+        hidden_states = self.linear_2(hidden_states)
+        return hidden_states
+
+
+def load_vision_model(prefix, config, weights):
+    if config.model_type == "clip_vision_model":
+        from text_generation_server.models.custom_modeling.clip import (
+            CLIPVisionTransformer,
+        )
+
+        return CLIPVisionTransformer(
+            prefix=f"{prefix}.vision_model", config=config, weights=weights
+        )
+    else:
+        raise RuntimeError(f"Unsupported model type {config.model_type}")
+
+
+def load_text_model(prefix, config, weights):
+    if config.model_type == "llama":
+        from text_generation_server.models.custom_modeling.flash_llama_modeling import (
+            FlashLlamaForCausalLM,
+        )
+
+        return FlashLlamaForCausalLM(prefix, config, weights)
+    elif config.model_type == "mistral":
+        from text_generation_server.models.custom_modeling.flash_mistral_modeling import (
+            FlashMistralForCausalLM,
+        )
+
+        return FlashMistralForCausalLM(prefix, config, weights)
+    else:
+        raise RuntimeError(f"Unsupported model type {config.model_type}")
+
+
+class LlavaNextForConditionalGeneration(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        config.vision_config.quantize = config.quantize
+        vision_config = config.vision_config
+        # Instead of selecting in hidden_states[-2].
+        # Instead compute only the n -2 + 1 layers and don't pool
+        if config.vision_feature_layer < 0:
+            vision_config.num_hidden_layers += config.vision_feature_layer + 1
+        else:
+            vision_config.num_hidden_layers = config.vision_feature_layer + 1
+        self.vision_tower = load_vision_model(
+            prefix="vision_tower" if not prefix else f"{prefix}.vision_tower",
+            config=config.vision_config,
+            weights=weights,
+        )
+
+        self.multi_modal_projector = LlavaNextMultiModalProjector(
+            prefix="multi_modal_projector", config=config, weights=weights
+        )
+
+        self.image_newline = weights.get_tensor("image_newline")
+
+        self.vocab_size = config.text_config.vocab_size
+        self.config = config
+        config.text_config.quantize = config.quantize
+        config.text_config.use_medusa = config.use_medusa
+        self.language_model = load_text_model(
+            prefix="language_model" if not prefix else f"{prefix}.language_model",
+            config=config.text_config,
+            weights=weights,
+        )
+        self.pad_token_id = (
+            config.pad_token_id if config.pad_token_id is not None else -1
+        )
+
+    def _merge_input_ids_with_image_features(
+        self,
+        input_ids: torch.Tensor,
+        inputs_embeds: torch.Tensor,
+        image_features: torch.Tensor,
+    ):
+        """In place merges in vision_embeddings with inputs_embeds."""
+        mask = input_ids == self.config.image_token_index
+        # Let's pray we have enabled enough slots !
+        inputs_embeds[mask] = image_features.view(-1, image_features.shape[-1])
+        return inputs_embeds
+
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        position_ids: torch.Tensor,
+        cu_seqlen_prefill: Optional[torch.Tensor],
+        kv_cache: List[Tuple[torch.Tensor, torch.Tensor]],
+        block_tables: torch.Tensor,
+        slots: torch.Tensor,
+        input_lengths: torch.Tensor,
+        max_s: int,
+        prefill_cache_indices: Optional[torch.Tensor],
+        lm_head_indices: Optional[torch.Tensor] = None,
+        pixel_values: torch.FloatTensor = None,
+        image_sizes: Optional[torch.LongTensor] = None,
+    ):
+        inputs_embeds = self.language_model.embed_tokens(input_ids)
+        if pixel_values is not None and len(pixel_values) > 0:
+            # num_special_image_tokens = (input_ids == self.config.image_token_index).sum()
+            # assert num_special_image_tokens == len(pixel_values), f"Received {num_special_image_tokens} for {len(pixel_values)} images, this is invalid"
+            # 1. Extract the input embeddings
+
+            # 2. Merge text and images
+            num_images, num_patches, channels, height, width = pixel_values.shape
+            pixel_values = pixel_values.view(
+                num_images * num_patches, channels, height, width
+            )
+            image_features = self.vision_tower(pixel_values)
+
+            # selected_image_feature = image_features.hidden_states[self.config.vision_feature_layer]
+            # Already done within the clip model
+            selected_image_feature = image_features.last_hidden_state
+
+            if self.config.vision_feature_select_strategy == "default":
+                selected_image_feature = selected_image_feature[:, 1:]
+            elif self.config.vision_feature_select_strategy == "full":
+                selected_image_feature = selected_image_feature
+            else:
+                raise RuntimeError(
+                    f"Strategy `{self.config.vision_feature_select_strategy}` is not supported/valid."
+                )
+
+            image_features = self.multi_modal_projector(selected_image_feature)
+
+            # split up image_features for each of the individual images
+            # hence we get a list of image_features, each of shape (5, num_patches, hidden_size)
+            # if we assume each image has 5 image features (base image + 4 patches)
+            split_sizes = [num_patches] * num_images
+            image_features = torch.split(image_features, split_sizes, dim=0)
+
+            # NOTE we only support multimodal_patch_merge_type == "spatial_unpad"
+            height = width = (
+                self.config.vision_config.image_size
+                // self.config.vision_config.patch_size
+            )
+
+            new_image_features = []
+            for image_idx, image_feature in enumerate(image_features):
+                if image_feature.shape[0] > 1:
+                    base_image_feature = image_feature[0]
+                    image_feature = image_feature[1:]
+
+                    if height * width != base_image_feature.shape[0]:
+                        raise ValueError(
+                            "The number of patches is not consistent with the image size."
+                        )
+                    num_patch_height, num_patch_width = get_anyres_image_grid_shape(
+                        image_sizes[image_idx],
+                        self.config.image_grid_pinpoints,
+                        self.config.vision_config.image_size,
+                    )
+                    image_feature = image_feature.view(
+                        num_patch_height, num_patch_width, height, width, -1
+                    )
+                    image_feature = image_feature.permute(4, 0, 2, 1, 3).contiguous()
+                    image_feature = image_feature.flatten(1, 2).flatten(2, 3)
+                    image_feature = unpad_image(image_feature, image_sizes[image_idx])
+                    image_feature = torch.cat(
+                        (
+                            image_feature,
+                            self.image_newline[:, None, None].expand(
+                                *image_feature.shape[:-1], 1
+                            ),
+                        ),
+                        dim=-1,
+                    )
+                    image_feature = image_feature.flatten(1, 2).transpose(0, 1)
+                    image_feature = torch.cat(
+                        (base_image_feature, image_feature), dim=0
+                    )
+                else:
+                    image_feature = image_feature[0]
+                    image_feature = torch.cat(
+                        (image_feature, self.image_newline[None]), dim=0
+                    )
+                new_image_features.append(image_feature)
+            image_features = torch.stack(new_image_features, dim=0)
+
+            inputs_embeds = self._merge_input_ids_with_image_features(
+                input_ids, inputs_embeds, image_features
+            )
+
+        hidden_states = self.language_model.model(
+            inputs_embeds=inputs_embeds,
+            position_ids=position_ids,
+            cu_seqlen_prefill=cu_seqlen_prefill,
+            kv_cache=kv_cache,
+            block_tables=block_tables,
+            slots=slots,
+            input_lengths=input_lengths,
+            max_s=max_s,
+            true_max_s=max_s,
+            prefill_cache_indices=None,
+        )
+        if lm_head_indices is not None:
+            hidden_states = hidden_states[lm_head_indices]
+        logits, speculative_logits = self.language_model.lm_head(hidden_states)
+        return logits, speculative_logits
diff --git a/server/text_generation_server/models/custom_modeling/mamba_modeling.py b/server/text_generation_server/models/custom_modeling/mamba_modeling.py
new file mode 100644
index 00000000..c58a617f
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/mamba_modeling.py
@@ -0,0 +1,232 @@
+import torch
+import torch.distributed
+
+from mamba_ssm.ops.triton.selective_state_update import selective_state_update
+from mamba_ssm.ops.selective_scan_interface import selective_scan_fn
+from torch import nn
+from typing import Optional, Tuple, Any
+from transformers.configuration_utils import PretrainedConfig
+import torch.nn.functional as F
+
+from text_generation_server.utils.layers import (
+    SpeculativeHead,
+    TensorParallelEmbedding,
+    FastRMSNorm,
+    FastLinear,
+)
+
+from einops import rearrange
+from causal_conv1d import causal_conv1d_fn, causal_conv1d_update
+import math
+from dataclasses import dataclass
+
+
+@dataclass
+class InferenceParams:
+    """Inference parameters that are passed to the main model in order
+    to efficienly calculate and store the context during inference."""
+
+    max_seqlen: int
+    max_batch_size: int
+    conv_states: torch.Tensor
+    ssm_states: torch.Tensor
+    seqlen_offset: int
+
+
+class MambaConfig(PretrainedConfig):
+    def __init__(
+        self,
+        vocab_size=50280,
+        d_model=768,
+        d_state=16,
+        n_layer=32,
+        layer_norm_epsilon=1e-5,
+        tie_word_embeddings=False,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        expand=2,
+        dt_rank="auto",
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.n_layer = n_layer
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.d_model = d_model
+        self.d_inner = d_model * 2
+        self.d_conv = 4
+        self.d_state = d_state
+        self.expand = expand
+        self.dt_rank = math.ceil(self.d_model / 16) if dt_rank == "auto" else dt_rank
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+class MambaBlock(nn.Module):
+    def __init__(self, prefix, config, weights, layer_id):
+        super().__init__()
+        self.layer_id = layer_id
+        self.in_proj = FastLinear.load(config, f"{prefix}.in_proj", weights, bias=False)
+        self.x_proj = FastLinear.load(config, f"{prefix}.x_proj", weights, bias=False)
+        self.dt_proj = FastLinear.load(config, f"{prefix}.dt_proj", weights, bias=True)
+        self.dt_proj_no_bias = FastLinear.load(
+            config, f"{prefix}.dt_proj", weights, bias=False
+        )
+        self.out_proj = FastLinear.load(
+            config, f"{prefix}.out_proj", weights, bias=False
+        )
+        self.conv1d = FastLinear.load(config, f"{prefix}.conv1d", weights, bias=True)
+        self.negA = -torch.exp(weights.get_tensor(f"{prefix}.A_log").float())
+        self.D = weights.get_tensor(f"{prefix}.D")
+        self.activation = "silu"
+        self.dt_rank = config.dt_rank
+        self.d_state = config.d_state
+        self.d_conv = config.d_conv
+        self.act = nn.SiLU()
+
+    # inference_params
+    def forward(self, hidden_states: torch.Tensor, inference_params=None):
+        if inference_params.seqlen_offset > 0:
+            conv_state = inference_params.conv_states[self.layer_id]
+            ssm_state = inference_params.ssm_states[self.layer_id]
+            out, conv_state, ssm_state = self.step(hidden_states, conv_state, ssm_state)
+            return out, conv_state, ssm_state
+
+        _, seqlen, _ = hidden_states.shape
+        projected_states = self.in_proj(hidden_states).transpose(1, 2)
+        # assert projected_states.shape == [batch_size, 2 * dstate, seqlen], f"{projected_states.shape} [{batch_size}, {dstate}, {seqlen}]"
+        x, z = projected_states.chunk(2, dim=1)
+        conv_state = F.pad(x, (self.d_conv - seqlen, 0))
+        x = causal_conv1d_fn(
+            x=x,
+            weight=self.conv1d.weight.squeeze(1),
+            bias=self.conv1d.bias,
+            activation=self.activation,
+        )
+
+        # We're careful here about the layout, to avoid extra transposes.
+        # We want dt to have d as the slowest moving dimension
+        # and L as the fastest moving dimension, since those are what the ssm_scan kernel expects.
+        x_dbl = self.x_proj(rearrange(x, "b d l -> (b l) d"))  # (bl d)
+        dt, B, C = torch.split(
+            x_dbl, [self.dt_rank, self.d_state, self.d_state], dim=-1
+        )
+        dt = self.dt_proj.weight @ dt.t()
+        dt = rearrange(dt, "d (b l) -> b d l", l=seqlen)
+        B = rearrange(B, "(b l) dstate -> b dstate l", l=seqlen).contiguous()
+        C = rearrange(C, "(b l) dstate -> b dstate l", l=seqlen).contiguous()
+        y, last_state = selective_scan_fn(
+            x,
+            dt,
+            self.negA,
+            B,
+            C,
+            self.D.float(),
+            z=z,
+            delta_bias=self.dt_proj.bias.float(),
+            delta_softplus=True,
+            return_last_state=True,
+        )
+        y = rearrange(y, "b d l -> b l d")
+        attn_outputs = self.out_proj(y)
+        return attn_outputs, conv_state, last_state
+
+    def step(self, hidden_states, conv_state, ssm_state):
+        xz = self.in_proj(hidden_states.squeeze(1))
+        x, z = xz.chunk(2, dim=-1)  # (B D)
+        x = causal_conv1d_update(
+            x,
+            conv_state,
+            self.conv1d.weight.squeeze(1),
+            self.conv1d.bias,
+            self.activation,
+        )
+        x_db = self.x_proj(x)  # (B dt_rank+2*d_state)
+        dt, B, C = torch.split(x_db, [self.dt_rank, self.d_state, self.d_state], dim=-1)
+        dt = F.linear(dt, self.dt_proj.weight)
+        A = self.negA
+        y = selective_state_update(
+            ssm_state,
+            x,
+            dt,
+            A,
+            B,
+            C,
+            self.D,
+            z=z,
+            dt_bias=self.dt_proj.bias,
+            dt_softplus=True,
+        )
+        out = self.out_proj(y)
+        return out.unsqueeze(1), conv_state.clone(), ssm_state.clone()
+
+
+class ResidualBlock(nn.Module):
+    def __init__(self, prefix, config, weights, layer_id):
+        super().__init__()
+        self.mamba_block = MambaBlock(
+            prefix=f"{prefix}.mixer", config=config, weights=weights, layer_id=layer_id
+        )
+        self.layer_norm = FastRMSNorm.load(
+            prefix=f"{prefix}.norm", weights=weights, eps=config.layer_norm_epsilon
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        residual: Optional[torch.Tensor] = None,
+        inference_params: Optional[Any] = None,
+    ):
+        residual = (hidden_states + residual) if residual is not None else hidden_states
+        shape = residual.shape
+        hidden_states, _ = self.layer_norm(residual.view(-1, shape[-1]))
+        hidden_states, conv_state, last_ssm_state = self.mamba_block(
+            hidden_states.view(*shape), inference_params
+        )
+        return hidden_states, residual, conv_state, last_ssm_state
+
+
+class MambaModel(nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        prefix = "backbone"
+        self.embed_tokens = TensorParallelEmbedding(f"{prefix}.embedding", weights)
+        self.blocks = nn.ModuleList(
+            [
+                ResidualBlock(f"{prefix}.layers.{i}", config, weights, layer_id=i)
+                for i in range(config.n_layer)
+            ]
+        )
+        self.norm_f = FastRMSNorm.load(
+            f"{prefix}.norm_f", weights, eps=config.layer_norm_epsilon
+        )
+        self.lm_head = SpeculativeHead.load(config, f"{prefix}.embedding", weights)
+        self.config = config
+
+    def forward(
+        self, input_ids: torch.Tensor, inference_params=None, residual=None
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        hidden_states = self.embed_tokens(input_ids)
+        for i, block in enumerate(self.blocks):
+            hidden_states, residual, conv_state, ssm_state = block(
+                hidden_states, residual, inference_params
+            )
+            inference_params.conv_states[i].copy_(conv_state)
+            inference_params.ssm_states[i].copy_(ssm_state)
+
+        hidden_states = (
+            hidden_states + residual if residual is not None else hidden_states
+        )
+        hidden_states, _ = self.norm_f(hidden_states.view(-1, hidden_states.size(-1)))
+        hidden_states = hidden_states.view(residual.shape)
+        logits, speculative_logits = self.lm_head(hidden_states)
+
+        # update the offset for the next inference using these params
+        inference_params.seqlen_offset += input_ids.size(1)
+        return logits, speculative_logits
diff --git a/server/text_generation_server/models/custom_modeling/mpt_modeling.py b/server/text_generation_server/models/custom_modeling/mpt_modeling.py
index 5ccf796d..9b0f8b92 100644
--- a/server/text_generation_server/models/custom_modeling/mpt_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/mpt_modeling.py
@@ -2,6 +2,7 @@
 
 Inspired by https://github.com/karpathy/minGPT/blob/master/mingpt/model.py
 """
+
 import math
 import os
 import warnings
@@ -20,7 +21,7 @@ from text_generation_server.utils.layers import (
     TensorParallelEmbedding,
     TensorParallelColumnLinear,
     TensorParallelRowLinear,
-    TensorParallelHead,
+    SpeculativeHead,
     get_linear,
 )
 
@@ -28,7 +29,6 @@ EPS = 1e-5
 
 
 def load_col(config, prefix, weights, bias):
-    assert bias == False, NotImplementedError
     assert config.quantize != "gptq", NotImplementedError
     slice_ = weights._get_slice(f"{prefix}.weight")
     rank = weights.process_group.rank()
@@ -45,7 +45,36 @@ def load_col(config, prefix, weights, bias):
     if weight.dtype != torch.int32:
         weight = weight.to(dtype=weights.dtype)
     weight = weight.to(device=weights.device)
-    bias = None
+
+    if bias:
+        bias_slice_ = weights._get_slice(f"{prefix}.bias")
+        bias_rank = weights.process_group.rank()
+        bias_size = weights.process_group.size()
+
+        bias_h = bias_slice_.get_shape()
+        bias_h = bias_h[0]
+        bias_block_size = bias_h // bias_size
+
+        bias_q_part = bias_slice_[
+            bias_rank * bias_block_size : (bias_rank + 1) * bias_block_size
+        ]
+        bias_k_part = bias_slice_[
+            bias_h
+            + bias_rank * bias_block_size : bias_h
+            + (bias_rank + 1) * bias_block_size
+        ]
+        bias_v_part = bias_slice_[
+            2 * bias_h
+            + bias_rank * bias_block_size : 2 * bias_h
+            + (bias_rank + 1) * bias_block_size
+        ]
+
+        bias = torch.cat([bias_q_part, bias_k_part, bias_v_part], dim=0)
+        if bias.dtype != torch.int32:
+            bias = bias.to(dtype=weights.dtype)
+        bias = bias.to(device=weights.device)
+    else:
+        bias = None
     linear = get_linear(weight, bias, config.quantize)
     return TensorParallelColumnLinear(linear)
 
@@ -330,7 +359,16 @@ class MultiheadAttention(nn.Module):
             config, prefix=f"{prefix}.Wqkv", weights=weights, bias=not config.no_bias
         )
         if self.qk_ln:
-            raise NotImplementedError("qk_ln is not supported")
+            bias = not config.no_bias
+            hidden_size = config.d_model
+            head_dim = hidden_size // self.n_heads
+
+            self.q_ln = LPLayerNorm(
+                d_model, bias=bias, prefix=f"{prefix}.q_ln", weights=weights
+            )
+            self.k_ln = LPLayerNorm(
+                self.n_heads * head_dim, prefix=f"{prefix}.k_ln", weights=weights
+            )
         if self.attn_impl == "flash":
             self.attn_fn = flash_attn_fn
         elif self.attn_impl == "triton":
@@ -581,12 +619,20 @@ class MPTBlock(nn.Module):
                 f"""Not implemented attn {config.attn_config["attn_type"]}"""
             )
         resid_pdrop = config.resid_pdrop
-        self.norm_1 = nn.LayerNorm.load_no_bias(
-            prefix=f"{prefix}.norm_1", weights=weights, eps=EPS
-        )
-        self.norm_2 = nn.LayerNorm.load_no_bias(
-            prefix=f"{prefix}.norm_2", weights=weights, eps=EPS
-        )
+        if config.no_bias:
+            self.norm_1 = nn.LayerNorm.load_no_bias(
+                prefix=f"{prefix}.norm_1", weights=weights, eps=EPS
+            )
+            self.norm_2 = nn.LayerNorm.load_no_bias(
+                prefix=f"{prefix}.norm_2", weights=weights, eps=EPS
+            )
+        else:
+            self.norm_1 = nn.LayerNorm.load(
+                prefix=f"{prefix}.norm_1", weights=weights, eps=EPS
+            )
+            self.norm_2 = nn.LayerNorm.load(
+                prefix=f"{prefix}.norm_2", weights=weights, eps=EPS
+            )
         self.attn = MultiheadAttention(config, prefix=f"{prefix}.attn", weights=weights)
         self.ffn = MPTMLP(config, prefix=f"{prefix}.ffn", weights=weights)
         self.resid_attn_dropout = nn.Dropout(resid_pdrop)
@@ -635,6 +681,9 @@ class LPLayerNorm(torch.nn.LayerNorm):
         elementwise_affine=True,
         device=None,
         dtype=None,
+        bias: Optional[bool] = True,
+        prefix=None,
+        weights=None,
     ):
         super().__init__(
             normalized_shape=normalized_shape,
@@ -642,7 +691,13 @@ class LPLayerNorm(torch.nn.LayerNorm):
             elementwise_affine=elementwise_affine,
             device=device,
             dtype=dtype,
+            bias=bias,
         )
+        if weights is not None:
+            self.weight = nn.Parameter(weights.get_sharded(f"{prefix}.weight", dim=0))
+            if bias:
+                self.bias = nn.Parameter(weights.get_sharded(f"{prefix}.bias", dim=0))
+            self.normalized_shape = self.weight.shape
 
     def forward(self, x):
         module_device = x.device
@@ -755,20 +810,23 @@ class MPTModel(MPTPreTrainedModel):
             )
 
         self.wte = TensorParallelEmbedding("transformer.wte", weights)
+
         if not self.alibi:
-            # self.wpe = torch.nn.Embedding(
-            #     config.max_seq_len, config.d_model, device=config.init_device
-            # )
-            raise RuntimeError("no alibi no supported")
+            self.wpe = TensorParallelEmbedding("transformer.wpe", weights)
         self.blocks = nn.ModuleList(
             [
                 MPTBlock(config, prefix=f"transformer.blocks.{i}", weights=weights)
                 for i in range(config.n_layers)
             ]
         )
-        self.norm_f = nn.LayerNorm.load_no_bias(
-            prefix="transformer.norm_f", weights=weights, eps=EPS
-        )
+        if config.no_bias:
+            self.norm_f = nn.LayerNorm.load_no_bias(
+                prefix="transformer.norm_f", weights=weights, eps=EPS
+            )
+        else:
+            self.norm_f = nn.LayerNorm.load(
+                prefix="transformer.norm_f", weights=weights, eps=EPS
+            )
         self.is_causal = not self.prefix_lm
         self._attn_bias_initialized = False
         self.attn_bias = None
@@ -787,8 +845,9 @@ class MPTModel(MPTPreTrainedModel):
                     if config.verbose:
                         warnings.warn(f"Removing bias ({module.bias}) from {module}.")
                     module.register_parameter("bias", None)
-        if config.verbose and config.verbose > 2:
-            print(self)
+        if hasattr(self.config, "verbose"):
+            if config.verbose and config.verbose > 2:
+                print(self)
         if "verbose" not in self.config.init_config:
             self.config.init_config["verbose"] = self.config.verbose
         if self.config.init_config["verbose"] > 1:
@@ -1031,7 +1090,7 @@ class MPTForCausalLM(MPTPreTrainedModel):
         if not config.tie_word_embeddings:
             raise ValueError("MPTForCausalLM only supports tied word embeddings")
         self.transformer = MPTModel(config, weights)
-        self.lm_head = TensorParallelHead.load(
+        self.lm_head = SpeculativeHead.load(
             config, prefix="transformer.wte", weights=weights
         )
         self.logit_scale = None
@@ -1074,7 +1133,7 @@ class MPTForCausalLM(MPTPreTrainedModel):
             output_hidden_states=output_hidden_states,
             use_cache=use_cache,
         )
-        logits = self.lm_head(outputs.last_hidden_state)
+        logits, speculative_logits = self.lm_head(outputs.last_hidden_state)
         if self.logit_scale is not None:
             if self.logit_scale == 0:
                 warnings.warn(
@@ -1088,12 +1147,15 @@ class MPTForCausalLM(MPTPreTrainedModel):
             loss = F.cross_entropy(
                 logits.view(-1, logits.size(-1)), labels.to(logits.device).view(-1)
             )
-        return CausalLMOutputWithPast(
-            loss=loss,
-            logits=logits,
-            past_key_values=outputs.past_key_values,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
+        return (
+            CausalLMOutputWithPast(
+                loss=loss,
+                logits=logits,
+                past_key_values=outputs.past_key_values,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            ),
+            speculative_logits,
         )
 
     def prepare_inputs_for_generation(
diff --git a/server/text_generation_server/models/custom_modeling/neox_modeling.py b/server/text_generation_server/models/custom_modeling/neox_modeling.py
index dbcefbae..1b060060 100644
--- a/server/text_generation_server/models/custom_modeling/neox_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/neox_modeling.py
@@ -44,7 +44,7 @@ from text_generation_server.utils.layers import (
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     TensorParallelRowLinear,
-    TensorParallelHead,
+    SpeculativeHead,
 )
 
 
@@ -646,7 +646,7 @@ class GPTNeoxForCausalLM(GPTNeoXPreTrainedModel):
     def __init__(self, config, weights):
         super().__init__(config)
         self.gpt_neox = GPTNeoXModel(config, weights)
-        self.embed_out = TensorParallelHead.load(
+        self.embed_out = SpeculativeHead.load(
             config, prefix="embed_out", weights=weights
         )
 
@@ -721,7 +721,7 @@ class GPTNeoxForCausalLM(GPTNeoXPreTrainedModel):
         )
 
         hidden_states = outputs[0]
-        lm_logits = self.embed_out(hidden_states)
+        lm_logits, speculative_logits = self.embed_out(hidden_states)
 
         lm_loss = None
         if labels is not None:
@@ -739,12 +739,15 @@ class GPTNeoxForCausalLM(GPTNeoXPreTrainedModel):
             output = (lm_logits,) + outputs[1:]
             return ((lm_loss,) + output) if lm_loss is not None else output
 
-        return CausalLMOutputWithPast(
-            loss=lm_loss,
-            logits=lm_logits,
-            past_key_values=outputs.past_key_values,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
+        return (
+            CausalLMOutputWithPast(
+                loss=lm_loss,
+                logits=lm_logits,
+                past_key_values=outputs.past_key_values,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            ),
+            speculative_logits,
         )
 
     def prepare_inputs_for_generation(
diff --git a/server/text_generation_server/models/custom_modeling/opt_modeling.py b/server/text_generation_server/models/custom_modeling/opt_modeling.py
index ce3f5e21..7a5cf917 100644
--- a/server/text_generation_server/models/custom_modeling/opt_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/opt_modeling.py
@@ -32,7 +32,7 @@ from text_generation_server.utils.layers import (
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     TensorParallelRowLinear,
-    TensorParallelHead,
+    SpeculativeHead,
 )
 
 EPS = 1e-5
@@ -748,7 +748,7 @@ class OPTForCausalLM(OPTPreTrainedModel):
 
         self.model = OPTModel(config, weights)
 
-        self.lm_head = TensorParallelHead.load(
+        self.lm_head = SpeculativeHead.load(
             config, prefix="model.decoder.embed_tokens", weights=weights
         )
 
@@ -792,16 +792,19 @@ class OPTForCausalLM(OPTPreTrainedModel):
             return_dict=return_dict,
         )
 
-        logits = self.lm_head(outputs[0]).contiguous()
+        logits, speculative_logits = self.lm_head(outputs)
 
         loss = None
 
-        return CausalLMOutputWithPast(
-            loss=loss,
-            logits=logits,
-            past_key_values=outputs.past_key_values,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
+        return (
+            CausalLMOutputWithPast(
+                loss=loss,
+                logits=logits,
+                past_key_values=outputs.past_key_values,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            ),
+            speculative_logits,
         )
 
     def prepare_inputs_for_generation(
diff --git a/server/text_generation_server/models/custom_modeling/phi_modeling.py b/server/text_generation_server/models/custom_modeling/phi_modeling.py
new file mode 100644
index 00000000..1571f9fd
--- /dev/null
+++ b/server/text_generation_server/models/custom_modeling/phi_modeling.py
@@ -0,0 +1,330 @@
+# imlementation of the PhiModel and PhiForCausalLM classes
+
+import torch
+import torch.distributed
+
+import math
+from torch import nn
+from typing import Optional, List, Tuple, Any
+from transformers.configuration_utils import PretrainedConfig
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+from text_generation_server.utils.layers import (
+    TensorParallelRowLinear,
+    TensorParallelColumnLinear,
+    TensorParallelEmbedding,
+    SpeculativeHead,
+    FastLinear,
+)
+
+
+# PhiConfig is the configuration class for the PhiModel.
+class PhiConfig(PretrainedConfig):
+    def __init__(
+        self,
+        vocab_size=51200,
+        n_positions=2048,
+        n_embd=2560,
+        n_layer=32,
+        n_inner=None,
+        n_head=32,
+        rotary_dim=32,
+        layer_norm_epsilon=1e-5,
+        tie_word_embeddings=False,
+        pad_vocab_size_multiple=64,
+        pad_token_id=0,
+        bos_token_id=1,
+        eos_token_id=2,
+        no_bias=False,
+        **kwargs,
+    ):
+        self.vocab_size = vocab_size
+        self.n_positions = n_positions
+        self.n_embd = n_embd
+        self.n_layer = n_layer
+        self.n_inner = n_inner
+        self.n_head = n_head
+        self.rotary_dim = rotary_dim
+
+        self.layer_norm_epsilon = layer_norm_epsilon
+        self.tie_word_embeddings = tie_word_embeddings
+        self.pad_vocab_size_multiple = pad_vocab_size_multiple
+        self.pad_token_id = pad_token_id
+        self.bos_token_id = bos_token_id
+        self.eos_token_id = eos_token_id
+        self.no_bias = no_bias
+
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )
+
+
+# RotaryEmbedding is a class that implements the rotary embedding.
+class RotaryEmbedding(nn.Module):
+    def __init__(self, dim, max_seq_len):
+        super().__init__()
+        inv_freq = [1.0 / 10000.0 ** (i / dim) for i in range(0, dim, 2)]
+        inv_freq_len = len(inv_freq)
+        inv_freq = torch.tensor(inv_freq).view(1, inv_freq_len)
+        t = torch.arange(0, max_seq_len, dtype=torch.float).view(max_seq_len, 1)
+        freqs = t.matmul(inv_freq)
+        self.sin = freqs.sin()
+        self.cos = freqs.cos()
+
+    def apply_rotary_emb_qkv(self, qkv, seqlen_offset):
+        b_size, seqlen, three, _, _headdim = qkv.shape
+        if three != 3:
+            raise Exception("unexpected shape for qkv")
+        _, rotary_dim = self.cos.shape
+        rotary_dim = rotary_dim * 2
+        q_rot = qkv[:, :, 0, :, :rotary_dim]
+        q_pass = qkv[:, :, 0, :, rotary_dim:]
+        k_rot = qkv[:, :, 1, :, :rotary_dim]
+        k_pass = qkv[:, :, 1, :, rotary_dim:]
+        q12 = torch.chunk(q_rot, 2, dim=-1)
+        k12 = torch.chunk(k_rot, 2, dim=-1)
+        q1, q2 = q12[0], q12[1]
+        k1, k2 = k12[0], k12[1]
+        c = self.cos.narrow(0, seqlen_offset, seqlen).unsqueeze(1)
+        s = self.sin.narrow(0, seqlen_offset, seqlen).unsqueeze(1)
+        q_rot = torch.cat(
+            [
+                q1 * c - q2 * s,
+                q1 * s + q2 * c,
+            ],
+            dim=-1,
+        )
+        k_rot = torch.cat(
+            [
+                k1 * c - k2 * s,
+                k1 * s + k2 * c,
+            ],
+            dim=-1,
+        )
+        q = torch.cat([q_rot, q_pass], dim=-1)
+        k = torch.cat([k_rot, k_pass], dim=-1)
+        v = qkv[:, :, 2]
+        return q, k, v
+
+
+# PhiCausalLMHead is the head of the PhiModel. It is a linear layer with a layer norm.
+class PhiCausalLMHead(nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        self.ln = nn.LayerNorm.load(
+            prefix="lm_head.ln",
+            weights=weights,
+            eps=config.layer_norm_epsilon,
+        )
+        self.linear = SpeculativeHead.load(
+            config=config, prefix="lm_head.linear", weights=weights
+        )
+
+    def forward(self, hidden_states):
+        hidden_states = self.ln(hidden_states)
+        hidden_states = self.linear(hidden_states)
+        return hidden_states
+
+
+# PhiMHA is a multi-head attention layer. This layer uses an attention mask to prevent tokens from attending to subsequent tokens.
+class PhiMHA(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+        self.Wqkv = TensorParallelColumnLinear.load(
+            config, prefix=f"{prefix}.Wqkv", weights=weights, bias=not config.no_bias
+        )
+        self.out_proj = TensorParallelRowLinear.load(
+            config,
+            prefix=f"{prefix}.out_proj",
+            weights=weights,
+            bias=not config.no_bias,
+        )
+        self.op_size = config.n_embd
+        self.head_dim = int(config.n_embd / config.n_head)
+        self.num_heads = config.n_head
+        self.rotary_emb = RotaryEmbedding(
+            config.rotary_dim,
+            config.n_positions,
+        )
+        self.softmax_scale = 1.0 / math.sqrt(self.head_dim)
+
+    def forward(
+        self,
+        hidden_states,
+        past_kv_cache,
+        attention_mask=None,
+    ):
+        b_size, seq_len, _n_embd = hidden_states.shape
+        qkv = self.Wqkv(hidden_states)
+        qkv = qkv.view(b_size, seq_len, 3, self.num_heads, self.head_dim)
+        seqlen_offset = 0 if past_kv_cache is None else past_kv_cache[0].shape[1]
+        q, k, v = self.rotary_emb.apply_rotary_emb_qkv(qkv, seqlen_offset)
+
+        # if there is a kv_cache, then we need to concatenate
+        if past_kv_cache is not None:
+            prev_k, prev_v = past_kv_cache
+            k = torch.cat([prev_k, k], dim=1)
+            v = torch.cat([prev_v, v], dim=1)
+
+        past_kv_cache = [k, v]
+        attn_weights = torch.einsum("bthd,bshd->bhts", q, k * self.softmax_scale)
+
+        if attention_mask is not None:
+            seqlen_k = k.shape[1]
+            seqlen_q = q.shape[1]
+            causal_mask = torch.triu(
+                torch.full((seqlen_q, seqlen_k), -10000.0, device=attn_weights.device),
+                1,
+            )
+            attn_weights = attn_weights + causal_mask.to(dtype=attn_weights.dtype)
+
+        attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
+        attn_output = attn_weights.matmul(v.transpose(1, 2)).squeeze(0)
+        attn_output = (
+            attn_output.view((b_size, self.num_heads, seq_len, self.head_dim))
+            .transpose(1, 2)
+            .flatten(-2)
+        )
+        return self.out_proj(attn_output), past_kv_cache
+
+
+# PhiMLP is a multi-layer perceptron. It contains two linear layers with a gelu activation function.
+class PhiMLP(nn.Module):
+    def __init__(self, prefix, config, weights):
+        super().__init__()
+
+        self.n_inner = config.n_inner
+        self.fc1 = FastLinear.load(
+            config=config,
+            prefix=f"{prefix}.fc1",
+            weights=weights,
+            bias=False,
+        )
+        self.fc2 = FastLinear.load(
+            config=config,
+            prefix=f"{prefix}.fc2",
+            weights=weights,
+            bias=False,
+        )
+        self.activation = torch.nn.functional.gelu
+
+    def forward(self, hidden_states):
+        hidden_states = self.fc1(hidden_states)
+        hidden_states = self.activation(hidden_states)
+        hidden_states = self.fc2(hidden_states)
+        return hidden_states
+
+
+# PhiBlock is a single transformer block. It contains a layer norm, a multi-head attention layer and an multi-layer perceptron.
+class PhiBlock(nn.Module):
+    def __init__(self, layer_id, config, weights):
+        super().__init__()
+        self.layer_id = layer_id
+        self.layer_norm = nn.LayerNorm.load(
+            prefix=f"{layer_id}.ln", weights=weights, eps=config.layer_norm_epsilon
+        )
+        self.mixer = PhiMHA(prefix=f"{layer_id}.mixer", config=config, weights=weights)
+        self.mlp = PhiMLP(prefix=f"{layer_id}.mlp", config=config, weights=weights)
+
+    def forward(
+        self,
+        hidden_states,
+        kv_cache,
+        attention_mask,
+    ):
+        residual = hidden_states
+        hidden_states = self.layer_norm(hidden_states)
+        attn_outputs, past_kv_cache = self.mixer(
+            hidden_states, kv_cache, attention_mask
+        )
+        feed_forward_hidden_states = self.mlp(hidden_states)
+        out = attn_outputs + feed_forward_hidden_states + residual
+        return out, past_kv_cache
+
+
+# PhiModel implements the embedding layer and the transformer blocks.
+class PhiModel(nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        self.tp_rank = weights.process_group.rank()
+        self.tp_world_size = weights.process_group.size()
+        self.embed_tokens = TensorParallelEmbedding(
+            prefix="transformer.embd.wte", weights=weights
+        )
+        self.blocks = nn.ModuleList(
+            [
+                PhiBlock(f"transformer.h.{layer_id}", config, weights)
+                for layer_id in range(config.n_layer)
+            ]
+        )
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        past_key_values: Optional[List[Tuple[torch.FloatTensor]]] = None,
+        attention_mask: Optional[torch.ByteTensor] = None,
+        return_dict: Optional[bool] = None,
+        use_cache: Optional[bool] = None,
+    ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]:
+        hidden_states = self.embed_tokens(input_ids)
+        seq_len = hidden_states.shape[1]
+        mask = None if seq_len <= 1 else attention_mask
+
+        past_key_values = (
+            [None] * len(self.blocks) if past_key_values is None else past_key_values
+        )
+
+        for index, block in enumerate(self.blocks):
+            hidden_states, new_key_values = block(
+                hidden_states, past_key_values[index], mask
+            )
+            past_key_values[index] = new_key_values
+
+        return hidden_states, past_key_values
+
+
+# PhiForCausalLM wraps the PhiModel and PhiCausalLMHead together and returns a CausalLMOutputWithPast object.
+class PhiForCausalLM(torch.nn.Module):
+    def __init__(self, config, weights):
+        super().__init__()
+        self.model = PhiModel(config, weights)
+        self.lm_head = PhiCausalLMHead(config, weights)
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+        past_key_values: Optional[List[Tuple[torch.FloatTensor]]] = None,
+        attention_mask: Optional[torch.ByteTensor] = None,
+        return_dict: Optional[bool] = None,
+        use_cache: Optional[bool] = None,
+        labels: Optional[torch.LongTensor] = None,
+    ) -> Tuple[torch.Tensor, List[Tuple[torch.Tensor, torch.Tensor]]]:
+        model_output = self.model(
+            input_ids, past_key_values, attention_mask, return_dict, use_cache
+        )
+        logits = self.lm_head(model_output[0])
+
+        loss = None
+        if labels is not None:
+            loss = nn.CrossEntropyLoss()(
+                logits[:, :-1].view(-1, logits.size(-1)), labels[:, 1:].view(-1)
+            )
+
+        if not return_dict:
+            return (
+                ((loss,) + (logits,) + model_output[1:])
+                if loss is not None
+                else (logits,) + model_output[1:]
+            )
+
+        return CausalLMOutputWithPast(
+            loss=loss,
+            logits=logits,
+            past_key_values=model_output[1],
+            hidden_states=None,
+            attentions=None,
+        )
diff --git a/server/text_generation_server/models/custom_modeling/t5_modeling.py b/server/text_generation_server/models/custom_modeling/t5_modeling.py
index d3e4f53a..2773fb15 100644
--- a/server/text_generation_server/models/custom_modeling/t5_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/t5_modeling.py
@@ -42,7 +42,7 @@ from text_generation_server.utils.layers import (
     TensorParallelColumnLinear,
     TensorParallelEmbedding,
     TensorParallelRowLinear,
-    TensorParallelHead,
+    SpeculativeHead,
 )
 
 
@@ -1033,14 +1033,14 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
         )
 
         try:
-            self.lm_head = TensorParallelHead.load(
+            self.lm_head = SpeculativeHead.load(
                 config, prefix="lm_head", weights=weights
             )
         except RuntimeError:
             # Some models like t5-small were saved with shared weights unlike flan
             # Since they are declared as the same arch we have no choice but hope
             # that this is OK instead of using a proper flag.
-            self.lm_head = TensorParallelHead.load(
+            self.lm_head = SpeculativeHead.load(
                 config, prefix="shared", weights=weights
             )
 
@@ -1126,7 +1126,7 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
             # See https://github.com/tensorflow/mesh/blob/fa19d69eafc9a482aff0b59ddd96b025c0cb207d/mesh_tensorflow/transformer/transformer.py#L586
             sequence_output = sequence_output * (self.model_dim**-0.5)
 
-        lm_logits = self.lm_head(sequence_output)
+        logits, speculative_logits = self.lm_head(sequence_output)
 
         loss = None
         if labels is not None:
@@ -1140,16 +1140,19 @@ class T5ForConditionalGeneration(T5PreTrainedModel):
             output = (lm_logits,) + decoder_outputs[1:] + encoder_outputs
             return ((loss,) + output) if loss is not None else output
 
-        return Seq2SeqLMOutput(
-            loss=loss,
-            logits=lm_logits,
-            past_key_values=decoder_outputs.past_key_values,
-            decoder_hidden_states=decoder_outputs.hidden_states,
-            decoder_attentions=decoder_outputs.attentions,
-            cross_attentions=decoder_outputs.cross_attentions,
-            encoder_last_hidden_state=encoder_outputs.last_hidden_state,
-            encoder_hidden_states=encoder_outputs.hidden_states,
-            encoder_attentions=encoder_outputs.attentions,
+        return (
+            Seq2SeqLMOutput(
+                loss=loss,
+                logits=logits,
+                past_key_values=decoder_outputs.past_key_values,
+                decoder_hidden_states=decoder_outputs.hidden_states,
+                decoder_attentions=decoder_outputs.attentions,
+                cross_attentions=decoder_outputs.cross_attentions,
+                encoder_last_hidden_state=encoder_outputs.last_hidden_state,
+                encoder_hidden_states=encoder_outputs.hidden_states,
+                encoder_attentions=encoder_outputs.attentions,
+            ),
+            speculative_logits,
         )
 
     def prepare_inputs_for_generation(
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index f1a4854f..2c440083 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -1,23 +1,26 @@
 import math
+import os
+import time
 import itertools
-from text_generation_server.utils.tokens import batch_top_tokens
 import torch
 import torch.distributed
 
 import numpy as np
 
+from loguru import logger
 from dataclasses import dataclass
 from opentelemetry import trace
 from transformers import PreTrainedTokenizerBase
-from typing import Optional, Tuple, List, Type, Union, Dict
+from typing import Optional, Tuple, List, Type, Dict
 
 from text_generation_server.models import Model
+from text_generation_server.utils.tokens import batch_top_tokens
+from text_generation_server.utils.speculate import get_speculate
 from text_generation_server.models.types import (
     Batch,
-    PrefillTokens,
+    Tokens,
     Generation,
     GeneratedText,
-    TopTokens,
 )
 from text_generation_server.models.cache_manager import (
     get_cache_manager,
@@ -25,6 +28,7 @@ from text_generation_server.models.cache_manager import (
     BLOCK_SIZE,
 )
 from text_generation_server.pb import generate_pb2
+from text_generation_server.models.globals import MEM_POOL, CUDA_GRAPHS
 from text_generation_server.utils import StoppingCriteria, HeterogeneousNextTokenChooser
 from text_generation_server.utils.dist import MEMORY_FRACTION
 
@@ -41,6 +45,7 @@ class FlashCausalLMBatch(Batch):
     # Decoder values
     input_ids: torch.Tensor
     position_ids: torch.Tensor
+    speculative_ids: torch.Tensor
 
     # Flash Attention values
 
@@ -60,7 +65,7 @@ class FlashCausalLMBatch(Batch):
     # Set in prefill by the CacheManager
     # list of length b of list of length s_i // block_size
     block_tables: Optional[List[List[int]]]
-    # tensor of size [b, max_seqlen // block_size] holding the paged attention block tables for all sequences
+    # tensor of size [b, max_total_seqlen // block_size] holding the paged attention block tables for all sequences
     block_tables_tensor: Optional[torch.Tensor]
     # tensor of length \sum_{i=0}^{b} max_s_i  holding the paged attention slots for all sequences
     slots: Optional[torch.Tensor]
@@ -101,6 +106,19 @@ class FlashCausalLMBatch(Batch):
             max_tokens=self.blocks * BLOCK_SIZE,
         )
 
+    @classmethod
+    def batch_tokenized_inputs(cls, requests, tokenizer):
+        batch_inputs = []
+        max_truncation = 0
+        for r in requests:
+            batch_inputs.append(r.inputs)
+            max_truncation = max(max_truncation, r.truncate)
+
+        batch_tokenized_inputs = tokenizer(
+            batch_inputs, truncation=True, max_length=max_truncation
+        )["input_ids"]
+        return batch_tokenized_inputs
+
     @classmethod
     def from_pb(
         cls,
@@ -109,17 +127,9 @@ class FlashCausalLMBatch(Batch):
         dtype: torch.dtype,
         device: torch.device,
     ) -> "FlashCausalLMBatch":
-        batch_inputs = []
-        max_truncation = 0
-        for r in pb.requests:
-            batch_inputs.append(r.inputs)
-            max_truncation = max(max_truncation, r.truncate)
-
-        batch_tokenized_inputs = tokenizer(
-            batch_inputs, truncation=True, max_length=max_truncation
-        )["input_ids"]
-
+        batch_tokenized_inputs = cls.batch_tokenized_inputs(pb.requests, tokenizer)
         position_ids = []
+        speculative_ids = []
         cu_seqlen_prefill = [0]
         needed_blocks_slots = []
         start_slots = []
@@ -159,6 +169,11 @@ class FlashCausalLMBatch(Batch):
             requests_idx_mapping[r.id] = i
 
             tokenized_input = tokenized_input[-r.truncate :]
+            if (
+                tokenized_input[0] == tokenizer.bos_token_id
+                and tokenized_input[1] == tokenizer.bos_token_id
+            ):
+                tokenized_input = tokenized_input[1:]
 
             input_length = len(tokenized_input)
             input_lengths.append(input_length)
@@ -186,7 +201,8 @@ class FlashCausalLMBatch(Batch):
 
             # Paged attention
             # Remove one as the first token des not have a past
-            total_tokens = input_length + max_new_tokens - 1
+            speculative_length = get_speculate()
+            total_tokens = input_length + max_new_tokens - 1 + speculative_length
             needed_blocks = math.ceil(total_tokens / BLOCK_SIZE)
             blocks += needed_blocks
             needed_blocks_slots.append((needed_blocks, total_tokens))
@@ -224,10 +240,12 @@ class FlashCausalLMBatch(Batch):
             cumulative_max_length += total_tokens
             max_seqlen = max(max_seqlen, input_length)
             max_blocks = max(max_blocks, needed_blocks)
-            max_length = max(max_length, input_length + max_new_tokens)
+            max_length = max(
+                max_length, input_length + max_new_tokens + speculative_length
+            )
 
         next_token_chooser = HeterogeneousNextTokenChooser.from_pb(
-            next_token_chooser_parameters, dtype, device
+            next_token_chooser_parameters, dtype, device, tokenizer
         )
         start_slots = torch.tensor(start_slots, dtype=torch.int64)
 
@@ -255,7 +273,6 @@ class FlashCausalLMBatch(Batch):
         cu_seqlen_prefill = torch.tensor(
             cu_seqlen_prefill, device=device, dtype=torch.int32
         )
-
         position_ids = position_ids.to(device)
         slot_indices = slot_indices.to(device)
         input_ids = torch.tensor(input_ids, dtype=torch.int64, device=device)
@@ -309,6 +326,7 @@ class FlashCausalLMBatch(Batch):
             top_n_tokens_tensor=top_n_tokens_tensor,
             blocks=blocks,
             max_blocks=max_blocks,
+            speculative_ids=None,
         )
 
     @tracer.start_as_current_span("filter")
@@ -419,6 +437,9 @@ class FlashCausalLMBatch(Batch):
         slots = self.slots[slot_filtering_indices]
         next_token_chooser = self.next_token_chooser.filter(indices)
         top_n_tokens_tensor = self.top_n_tokens_tensor[indices]
+        speculative_ids = (
+            self.speculative_ids[indices] if self.speculative_ids is not None else None
+        )
 
         start_slots = torch.tensor(start_slots, dtype=torch.int64)
 
@@ -454,6 +475,7 @@ class FlashCausalLMBatch(Batch):
             top_n_tokens_tensor=top_n_tokens_tensor,
             blocks=blocks,
             max_blocks=max_blocks,
+            speculative_ids=speculative_ids,
         )
 
     @classmethod
@@ -473,6 +495,9 @@ class FlashCausalLMBatch(Batch):
             total_batch_size += len(b)
             total_slots += len(b.slots)
             blocks += b.blocks
+            speculative_length = (
+                b.speculative_ids.shape[1] if b.speculative_ids is not None else 0
+            )
             max_blocks = max(max_blocks, b.max_blocks)
             max_seqlen = max(max_seqlen, b.max_seqlen)
             max_length = max(
@@ -480,6 +505,7 @@ class FlashCausalLMBatch(Batch):
                 max(
                     input_length
                     + stopping_criteria.max_new_tokens
+                    + speculative_length
                     - stopping_criteria.current_tokens
                     for input_length, stopping_criteria in zip(
                         b.input_lengths, b.stopping_criterias
@@ -513,6 +539,7 @@ class FlashCausalLMBatch(Batch):
         read_offsets = []
 
         next_token_chooser_parameters = []
+        fsm_grammar_states = []
         stopping_criterias = []
         top_n_tokens = []
 
@@ -561,6 +588,7 @@ class FlashCausalLMBatch(Batch):
             read_offsets.extend(batch.read_offsets)
 
             next_token_chooser_parameters.extend([r.parameters for r in batch.requests])
+            fsm_grammar_states.extend(batch.next_token_chooser.fsm_grammar_states)
             stopping_criterias.extend(batch.stopping_criterias)
 
             top_n_tokens.extend(batch.top_n_tokens)
@@ -575,6 +603,14 @@ class FlashCausalLMBatch(Batch):
             next_token_chooser_parameters,
             dtype=batches[0].next_token_chooser.dtype,
             device=batches[0].next_token_chooser.device,
+            tokenizer=batches[0].next_token_chooser.tokenizer,
+            fsm_grammar_states=fsm_grammar_states,
+        )
+
+        speculative_ids = (
+            torch.cat([b.speculative_ids for b in batches], dim=0)
+            if batches[0].speculative_ids is not None
+            else None
         )
 
         # Needed to avoid dropping blocks when the batches will go out of scope
@@ -611,6 +647,7 @@ class FlashCausalLMBatch(Batch):
             top_n_tokens_tensor=top_n_tokens_tensor,
             blocks=blocks,
             max_blocks=max_blocks,
+            speculative_ids=speculative_ids,
         )
 
     def __del__(self):
@@ -642,6 +679,8 @@ class FlashCausalLM(Model):
         self.num_kv_heads = num_kv_heads
         self.head_size = head_size
 
+        self.cuda_graphs = {}
+
         super(FlashCausalLM, self).__init__(
             model=model,
             tokenizer=tokenizer,
@@ -657,7 +696,62 @@ class FlashCausalLM(Model):
     def batch_type(self) -> Type[FlashCausalLMBatch]:
         return FlashCausalLMBatch
 
+    def cuda_graph_warmup(self, bs: int, max_s: int, max_bt: int):
+        input_ids = torch.zeros(bs, dtype=torch.int64, device=self.device)
+        position_ids = torch.zeros(bs, dtype=torch.int32, device=self.device)
+        slots = torch.arange(bs, dtype=torch.int64, device=self.device)
+        input_lengths = torch.ones(bs, dtype=torch.int32, device=self.device) * max_s
+        block_tables = (
+            torch.arange(max_bt, dtype=torch.int32, device=self.device)
+            .repeat(bs)
+            .reshape((bs, max_bt))
+        )
+        kv_cache = get_cache_manager().kv_cache
+
+        self.cuda_graphs[bs] = {
+            "input_ids": input_ids,
+            "position_ids": position_ids,
+            "kv_cache": kv_cache,
+            "block_tables": block_tables,
+            "slots": slots,
+            "input_lengths": input_lengths,
+        }
+        graph = torch.cuda.CUDAGraph()
+        self.cuda_graphs[bs]["graph"] = graph
+
+        torch.cuda.synchronize()
+        # Run once outside to warmup
+        self.model.forward(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            cu_seqlen_prefill=None,
+            kv_cache=kv_cache,
+            block_tables=block_tables,
+            slots=slots,
+            input_lengths=input_lengths,
+            max_s=max_s,
+            lm_head_indices=None,
+        )
+        torch.cuda.synchronize()
+
+        with torch.cuda.graph(graph, pool=MEM_POOL):
+            logits, speculative_logits = self.model.forward(
+                input_ids=input_ids,
+                position_ids=position_ids,
+                cu_seqlen_prefill=None,
+                kv_cache=kv_cache,
+                block_tables=block_tables,
+                slots=slots,
+                input_lengths=input_lengths,
+                max_s=max_s,
+                lm_head_indices=None,
+            )
+            self.cuda_graphs[bs]["logits"] = logits
+            self.cuda_graphs[bs]["speculative_logits"] = speculative_logits
+        torch.cuda.synchronize()
+
     def warmup(self, batch: FlashCausalLMBatch):
+        # The warmup batch is the biggest batch we could ever receive
         torch.cuda.empty_cache()
         try:
             cache_manager = set_cache_manager(
@@ -669,7 +763,9 @@ class FlashCausalLM(Model):
                 self.dtype,
                 self.device,
             )
-            _, batch = self.generate_token(batch)
+            max_bt = batch.max_blocks
+            max_s = max_bt * get_cache_manager().block_size
+            _, batch, _ = self.generate_token(batch)
         except torch.cuda.OutOfMemoryError as e:
             raise RuntimeError(
                 f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
@@ -692,7 +788,8 @@ class FlashCausalLM(Model):
         )
 
         num_blocks = (
-            int(free_memory // total_cache_size)
+            # Leave 5% for some wiggle room
+            int((free_memory * 0.95) // total_cache_size)
             # Add batch.blocks as we allocated it above, so it is included in the peak memory.
             + cache_manager.num_blocks
         )
@@ -710,26 +807,121 @@ class FlashCausalLM(Model):
             self.device,
         )
 
+        if CUDA_GRAPHS:
+            try:
+                logger.info(f"Cuda Graphs are enabled for sizes {CUDA_GRAPHS}")
+                # Warmup cuda graphs
+                for bs in CUDA_GRAPHS:
+                    if self.speculate is None or self.speculate + 1 <= bs:
+                        self.cuda_graph_warmup(bs, max_s, max_bt)
+            except torch.cuda.OutOfMemoryError:
+                logger.exception(f"Decode cuda graph warmup failed")
+
         return int(num_blocks * BLOCK_SIZE)
 
-    def forward(self, batch: FlashCausalLMBatch) -> Tuple[torch.Tensor, torch.Tensor]:
+    def forward(
+        self, batch: FlashCausalLMBatch
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
         # Model Forward
-        return self.model.forward(
-            input_ids=batch.input_ids,
-            position_ids=batch.position_ids,
-            cu_seqlen_prefill=batch.cu_seqlen_prefill,
-            kv_cache=get_cache_manager().kv_cache,
-            block_tables=batch.block_tables_tensor,
-            slots=batch.slots[batch.slot_indices],
-            input_lengths=batch.input_lengths_tensor,
-            max_s=batch.max_seqlen,
-            lm_head_indices=batch.prefill_head_indices,
+        if batch.speculative_ids is not None:
+            input_ids = batch.input_ids
+            position_ids = batch.position_ids
+            cu_seqlen_prefill = batch.cu_seqlen_prefill
+            kv_cache = get_cache_manager().kv_cache
+            block_tables = batch.block_tables_tensor
+            slots = batch.slots[batch.slot_indices]
+            input_lengths = batch.input_lengths_tensor
+            max_s = batch.max_seqlen
+            lm_head_indices = batch.prefill_head_indices
+
+            speculative_ids = batch.speculative_ids
+
+            B, speculative_length = speculative_ids.shape
+            new_length = speculative_length + 1
+            new_input_ids = torch.cat(
+                [input_ids.unsqueeze(-1), speculative_ids], dim=1
+            ).reshape(-1)
+            arange = torch.arange(new_length, device=position_ids.device).unsqueeze(0)
+            arange_int = arange.to(dtype=torch.int32)
+            new_position_ids = (
+                position_ids.unsqueeze(-1).expand(B, new_length) + arange
+            ).view(-1)
+            slots = (slots.unsqueeze(-1).expand(B, new_length) + arange_int).view(-1)
+            input_lengths = (
+                input_lengths.unsqueeze(-1).expand(B, new_length) + arange_int
+            ).view(-1)
+
+            # Add Copy the block tables for all members
+            block_tables = (
+                block_tables.unsqueeze(1)
+                .expand(B, new_length, -1)
+                .reshape(B * new_length, -1)
+                .contiguous()
+            )
+            max_s = max_s + speculative_length
+
+            input_ids = new_input_ids
+            position_ids = new_position_ids
+        else:
+            input_ids = batch.input_ids
+            position_ids = batch.position_ids
+            cu_seqlen_prefill = batch.cu_seqlen_prefill
+            kv_cache = get_cache_manager().kv_cache
+            block_tables = batch.block_tables_tensor
+            slots = batch.slots[batch.slot_indices]
+            input_lengths = batch.input_lengths_tensor
+            max_s = batch.max_seqlen
+            lm_head_indices = batch.prefill_head_indices
+
+        bs = input_ids.shape[0]
+        sorted_padded_bs = sorted([k for k in self.cuda_graphs.keys() if k >= bs])
+        if sorted_padded_bs:
+            # Get associated cuda graph
+            cuda_graph = self.cuda_graphs[sorted_padded_bs[0]]
+        else:
+            cuda_graph = None
+
+        if cu_seqlen_prefill is not None or cuda_graph is None:
+            return self.model.forward(
+                input_ids=input_ids,
+                position_ids=position_ids,
+                cu_seqlen_prefill=cu_seqlen_prefill,
+                kv_cache=kv_cache,
+                block_tables=block_tables,
+                slots=slots,
+                input_lengths=input_lengths,
+                max_s=max_s,
+                lm_head_indices=lm_head_indices,
+            )
+
+        # Copy inputs to the static inputs of the cuda graph
+        # Static inputs are potentially padded
+        cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
+        cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
+        cuda_graph["block_tables"][
+            : block_tables.shape[0], : block_tables.shape[1]
+        ] = block_tables
+        cuda_graph["slots"].fill_(-1)
+        cuda_graph["slots"][: slots.shape[0]] = slots
+        cuda_graph["input_lengths"].zero_()
+        cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
+
+        # Replay the graph
+        cuda_graph["graph"].replay()
+        # Slice output to the correct shape
+        speculative_logits = (
+            cuda_graph["speculative_logits"][:bs]
+            if cuda_graph["speculative_logits"] is not None
+            else None
         )
+        logits = cuda_graph["logits"][:bs]
+        return logits, speculative_logits
 
     @tracer.start_as_current_span("generate_token")
     def generate_token(
         self, batch: FlashCausalLMBatch
-    ) -> Tuple[List[Generation], Optional[FlashCausalLMBatch]]:
+    ) -> Tuple[List[Generation], Optional[FlashCausalLMBatch], Tuple[int, int]]:
+        start = time.time_ns()
         prefill = batch.cu_seqlen_prefill is not None
         prefill_logprobs = batch.prefill_next_token_indices is not None
 
@@ -747,7 +939,7 @@ class FlashCausalLM(Model):
             batch.slots = slots
 
         try:
-            out = self.forward(batch)
+            out, speculative_logits = self.forward(batch)
         except Exception as e:
             del batch
             raise e
@@ -756,15 +948,32 @@ class FlashCausalLM(Model):
             next_token_logits = (
                 out[batch.prefill_next_token_indices] if prefill_logprobs else out
             )
+            if speculative_logits is not None:
+                speculative_logits = (
+                    speculative_logits[batch.prefill_next_token_indices]
+                    if prefill_logprobs
+                    else speculative_logits
+                )
         else:
             next_token_logits = out
 
-        next_input_ids, next_token_logprobs, logprobs = batch.next_token_chooser(
-            batch.all_input_ids_tensor[:, : batch.max_seqlen], next_token_logits
+        speculate = get_speculate()
+        (
+            next_input_ids,
+            next_token_logprobs,
+            logprobs,
+            accepted_ids,
+            speculative_ids,
+        ) = batch.next_token_chooser(
+            batch.all_input_ids_tensor[:, : batch.max_seqlen],
+            next_token_logits,
+            speculate,
+            batch.speculative_ids,
+            speculative_logits,
         )
 
         batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
-            batch.top_n_tokens, batch.top_n_tokens_tensor, logprobs
+            batch.top_n_tokens, batch.top_n_tokens_tensor, logprobs, accepted_ids
         )
 
         if prefill:
@@ -789,20 +998,15 @@ class FlashCausalLM(Model):
         stopped = True
 
         # Zipped iterator
-        iterator = zip(
-            batch.input_lengths,
-            batch.all_input_ids,
-        )
+        iterator = zip(batch.input_lengths, batch.all_input_ids, accepted_ids)
 
         # We do two for loops as the first one can run completely asynchronously from the GPU while for the second
         # one, we need to first do a GPU <-> CPU sync
         # It is faster if we delay this sync for the maximum amount of time
 
         # For each member of the batch
-        for i, (
-            input_length,
-            all_input_ids,
-        ) in enumerate(iterator):
+        index = 0
+        for i, (input_length, all_input_ids, n_accepted_ids) in enumerate(iterator):
             # Indexing metadata
             start_index = cumulative_length
             end_index = cumulative_length + input_length
@@ -821,24 +1025,27 @@ class FlashCausalLM(Model):
                 # Copy batch.input_ids to prefill_token_indices
                 if prefill_logprobs:
                     if len(batch) > 1:
-                        prefill_tokens_indices[
-                            out_start_index : out_end_index - 1
-                        ] = batch.input_ids[start_index + 1 : start_index + out_length]
+                        prefill_tokens_indices[out_start_index : out_end_index - 1] = (
+                            batch.input_ids[start_index + 1 : start_index + out_length]
+                        )
                     else:
                         # Set prefill_tokens_indices to the correct slice
                         prefill_tokens_indices = batch.input_ids[
                             start_index + 1 : start_index + out_length
                         ]
 
-            batch.all_input_ids_tensor[i, input_length] = next_input_ids[i]
+            for j in range(n_accepted_ids):
+                batch.all_input_ids_tensor[i, input_length + j] = next_input_ids[index]
+                index += 1
 
             cumulative_length += input_length
 
-        # Set values in batch
-        batch.input_ids = next_input_ids
-        batch.position_ids = next_position_ids + 1
-        batch.input_lengths_tensor += 1
-        batch.slot_indices += 1
+        # Update values
+        batch.input_ids = next_input_ids[accepted_ids.cumsum(dim=-1) - 1]
+        batch.speculative_ids = speculative_ids
+        batch.position_ids = next_position_ids + accepted_ids
+        batch.input_lengths_tensor += accepted_ids
+        batch.slot_indices += accepted_ids
 
         if prefill and prefill_logprobs:
             # Get prefill logprobs
@@ -851,7 +1058,9 @@ class FlashCausalLM(Model):
 
         # GPU <-> CPU sync
         next_token_logprobs = next_token_logprobs.tolist()
-        next_token_ids = batch.input_ids.tolist()
+        next_token_ids = next_input_ids.tolist()
+        accepted_ids = accepted_ids.tolist()
+        start_decode = time.time_ns()
 
         # Zipped iterator
         iterator = zip(
@@ -864,13 +1073,13 @@ class FlashCausalLM(Model):
             batch.next_token_chooser.do_sample,
             batch.next_token_chooser.seeds,
             batch.top_n_tokens,
-            next_token_ids,
-            next_token_logprobs,
+            accepted_ids,
             batch_top_token_ids,
             batch_top_token_logprobs,
         )
 
         # For each member of the batch
+        index = 0
         for i, (
             request,
             input_length,
@@ -881,29 +1090,44 @@ class FlashCausalLM(Model):
             do_sample,
             seed,
             top_n_tokens,
-            next_token_id,
-            next_token_logprob,
+            n_accepted_ids,
             top_token_ids,
             top_token_logprobs,
         ) in enumerate(iterator):
             # Append next token to all tokens
-            all_input_ids.append(next_token_id)
+            next_token_texts = []
+            left = 0
 
-            # Generated token
-            next_token_text, prefix_offset, read_offset = self.decode_token(
-                all_input_ids,
-                prefix_offset,
-                read_offset,
-            )
+            current_stopped = False
+            for j in range(index, index + n_accepted_ids):
+                # Generated token
+                next_token_id = next_token_ids[j]
+                all_input_ids.append(next_token_id)
+                next_token_text, prefix_offset, read_offset = self.decode_token(
+                    all_input_ids,
+                    prefix_offset,
+                    read_offset,
+                )
+                next_token_texts.append(next_token_text)
 
-            # Evaluate stopping criteria
-            stop, reason = stopping_criteria(
-                next_token_id,
-                next_token_text,
-            )
+                stop, reason = stopping_criteria(
+                    next_token_id,
+                    next_token_text,
+                )
 
-            if not stop:
-                stopped = False
+                if stop:
+                    left = index + n_accepted_ids - j - 1
+                    current_stopped = True
+                    break
+                else:
+                    current_stopped = False
+            stopped = stopped and current_stopped
+
+            _next_token_ids = next_token_ids[index : index + n_accepted_ids - left]
+            _next_token_logprobs = next_token_logprobs[
+                index : index + n_accepted_ids - left
+            ]
+            index += n_accepted_ids
 
             # Shard generations
             # All generations will be appended in the rust sharded client
@@ -943,45 +1167,67 @@ class FlashCausalLM(Model):
                         clean_up_tokenization_spaces=False,
                         skip_special_tokens=False,
                     )
-                    prefill_tokens = PrefillTokens(
-                        prefill_token_ids, request_prefill_logprobs, prefill_texts
+
+                    prefill_tokens = Tokens(
+                        prefill_token_ids,
+                        request_prefill_logprobs,
+                        prefill_texts,
+                        is_special=[],
                     )
                 else:
                     prefill_tokens = None
 
                 if top_n_tokens > 0:
-                    toptoken_texts = self.tokenizer.batch_decode(
-                        top_token_ids,
-                        clean_up_tokenization_spaces=False,
-                        skip_special_tokens=False,
-                    )
-                    special_toptokens = [
-                        token_id in self.all_special_ids for token_id in top_token_ids
-                    ]
-                    top_tokens = TopTokens(
-                        top_token_ids,
-                        top_token_logprobs,
-                        toptoken_texts,
-                        special_toptokens,
-                    )
+                    all_top_tokens = []
+                    for top_token_ids, top_token_logprobs in zip(
+                        top_token_ids, top_token_logprobs
+                    ):
+                        toptoken_texts = self.tokenizer.batch_decode(
+                            top_token_ids,
+                            clean_up_tokenization_spaces=False,
+                            skip_special_tokens=False,
+                        )
+                        special_toptokens = [
+                            token_id in self.all_special_ids
+                            for token_id in top_token_ids
+                        ]
+                        top_tokens = Tokens(
+                            top_token_ids,
+                            top_token_logprobs,
+                            toptoken_texts,
+                            special_toptokens,
+                        )
+                        all_top_tokens.append(top_tokens)
+                    top_tokens = all_top_tokens
                 else:
                     top_tokens = None
 
                 generation = Generation(
                     request.id,
                     prefill_tokens,
-                    next_token_id,
-                    next_token_logprob,
-                    next_token_text,
-                    next_token_id in self.all_special_ids,
+                    Tokens(
+                        _next_token_ids,
+                        _next_token_logprobs,
+                        next_token_texts,
+                        [nid in self.all_special_ids for nid in _next_token_ids],
+                    ),
                     generated_text,
                     top_tokens,
                 )
 
                 generations.append(generation)
 
+            # accept each new token for this specific request since we may
+            # have more than one new token per request with speculative decoding
+            for next_token_id in _next_token_ids:
+                batch.next_token_chooser = (
+                    batch.next_token_chooser.advance_grammar_single(i, next_token_id)
+                )
+
             # Update values
-            batch.input_lengths[i] = input_length + 1
+            batch.input_lengths[i] = input_length + n_accepted_ids
+            if batch.input_lengths[i] > batch.max_seqlen:
+                batch.max_seqlen = batch.input_lengths[i]
             batch.prefix_offsets[i] = prefix_offset
             batch.read_offsets[i] = read_offset
             batch.all_input_ids[i] = all_input_ids
@@ -989,11 +1235,14 @@ class FlashCausalLM(Model):
         if stopped:
             del batch
             # No need to return a batch if we know that all requests stopped
-            return generations, None
+            forward_ns = start_decode - start
+            decode_ns = time.time_ns() - start_decode
+            return generations, None, (forward_ns, decode_ns)
 
         batch.prefill_cu_outlens = None
         batch.prefill_head_indices = None
         batch.prefill_next_token_indices = None
-        batch.max_seqlen = batch.max_seqlen + 1
 
-        return generations, batch
+        forward_ns = start_decode - start
+        decode_ns = time.time_ns() - start_decode
+        return generations, batch, (forward_ns, decode_ns)
diff --git a/server/text_generation_server/models/flash_cohere.py b/server/text_generation_server/models/flash_cohere.py
new file mode 100644
index 00000000..f85c7722
--- /dev/null
+++ b/server/text_generation_server/models/flash_cohere.py
@@ -0,0 +1,74 @@
+import torch
+import torch.distributed
+
+from opentelemetry import trace
+from typing import Optional
+from transformers import AutoTokenizer, AutoConfig
+
+from text_generation_server.models import FlashCausalLM
+from text_generation_server.models.custom_modeling.flash_cohere_modeling import (
+    FlashCohereForCausalLM,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+tracer = trace.get_tracer(__name__)
+
+
+class FlashCohere(FlashCausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashCohere is only available on GPU")
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+            use_fast=True,
+            from_slow=False,
+        )
+
+        config = AutoConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = FlashCohereForCausalLM(config, weights)
+
+        torch.distributed.barrier(group=self.process_group)
+        super(FlashCohere, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+        )
diff --git a/server/text_generation_server/models/flash_dbrx.py b/server/text_generation_server/models/flash_dbrx.py
new file mode 100644
index 00000000..367d3db0
--- /dev/null
+++ b/server/text_generation_server/models/flash_dbrx.py
@@ -0,0 +1,99 @@
+import torch
+import torch.distributed
+
+from opentelemetry import trace
+from typing import Optional
+from transformers import AutoTokenizer
+from transformers.models.gpt2 import GPT2TokenizerFast
+
+from text_generation_server.models import FlashCausalLM
+from text_generation_server.models.custom_modeling.flash_dbrx_modeling import (
+    FlashDbrxForCausalLM,
+    DbrxConfig,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+tracer = trace.get_tracer(__name__)
+
+
+class FlashDbrx(FlashCausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.bfloat16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashDBRX is only available on GPU")
+
+        try:
+            tokenizer = GPT2TokenizerFast.from_pretrained(
+                model_id,
+                revision=revision,
+                padding_side="left",
+                truncation_side="left",
+                trust_remote_code=trust_remote_code,
+                use_fast=True,
+                from_slow=False,
+            )
+        except:
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    model_id,
+                    revision=revision,
+                    padding_side="left",
+                    truncation_side="left",
+                    trust_remote_code=trust_remote_code,
+                    use_fast=True,
+                    from_slow=False,
+                )
+            except:
+                # FIXME: change back to model id once the tokenizer.json is merged
+                tokenizer = GPT2TokenizerFast.from_pretrained(
+                    "Xenova/dbrx-instruct-tokenizer",
+                    revision=revision,
+                    padding_side="left",
+                    truncation_side="left",
+                    trust_remote_code=trust_remote_code,
+                    use_fast=True,
+                    from_slow=False,
+                )
+
+        config = DbrxConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = FlashDbrxForCausalLM(config, weights)
+
+        torch.distributed.barrier(group=self.process_group)
+        super(FlashDbrx, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+        )
diff --git a/server/text_generation_server/models/flash_gemma.py b/server/text_generation_server/models/flash_gemma.py
new file mode 100644
index 00000000..7259b820
--- /dev/null
+++ b/server/text_generation_server/models/flash_gemma.py
@@ -0,0 +1,75 @@
+import torch
+import torch.distributed
+
+from opentelemetry import trace
+from typing import Optional
+from transformers.models.gemma import GemmaTokenizerFast
+
+from text_generation_server.models import FlashCausalLM
+from text_generation_server.models.custom_modeling.flash_gemma_modeling import (
+    FlashGemmaForCausalLM,
+    GemmaConfig,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+tracer = trace.get_tracer(__name__)
+
+
+class FlashGemma(FlashCausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.bfloat16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashGemma is only available on GPU")
+
+        tokenizer = GemmaTokenizerFast.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+            use_fast=True,
+            from_slow=False,
+        )
+
+        config = GemmaConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = FlashGemmaForCausalLM(config, weights)
+
+        torch.distributed.barrier(group=self.process_group)
+        super(FlashGemma, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+        )
diff --git a/server/text_generation_server/models/flash_llama.py b/server/text_generation_server/models/flash_llama.py
index d2ed0b15..56768942 100644
--- a/server/text_generation_server/models/flash_llama.py
+++ b/server/text_generation_server/models/flash_llama.py
@@ -26,6 +26,7 @@ class FlashLlama(FlashCausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -57,16 +58,17 @@ class FlashLlama(FlashCausalLM):
             model_id, revision=revision, trust_remote_code=trust_remote_code
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
 
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
         weights = Weights(filenames, device, dtype, process_group=self.process_group)
         if config.quantize in ["gptq", "awq"]:
-            weights._set_gptq_params(model_id)
-
-        model = FlashLlamaForCausalLM(config, weights)
+            weights._set_gptq_params(model_id, revision)
 
+        prefix = ""
+        model = FlashLlamaForCausalLM(prefix, config, weights)
         torch.distributed.barrier(group=self.process_group)
         super(FlashLlama, self).__init__(
             model=model,
diff --git a/server/text_generation_server/models/flash_mistral.py b/server/text_generation_server/models/flash_mistral.py
index 919e4625..ace7ea8e 100644
--- a/server/text_generation_server/models/flash_mistral.py
+++ b/server/text_generation_server/models/flash_mistral.py
@@ -6,8 +6,7 @@ import numpy as np
 
 from dataclasses import dataclass
 from opentelemetry import trace
-from transformers import PreTrainedTokenizerBase
-from transformers.models.llama import LlamaTokenizerFast
+from transformers import PreTrainedTokenizerBase, AutoTokenizer, AutoConfig
 from typing import Optional, Tuple, Type
 
 from text_generation_server.pb import generate_pb2
@@ -15,12 +14,12 @@ from text_generation_server.models import FlashCausalLM
 from text_generation_server.models.flash_causal_lm import FlashCausalLMBatch, BLOCK_SIZE
 from text_generation_server.models.cache_manager import (
     get_cache_manager,
-    set_cache_manager,
 )
 from text_generation_server.models.custom_modeling.flash_mistral_modeling import (
     FlashMistralForCausalLM,
     MistralConfig,
 )
+from text_generation_server.utils.speculate import get_speculate
 from text_generation_server.utils import (
     initialize_torch_distributed,
     weight_files,
@@ -35,6 +34,21 @@ tracer = trace.get_tracer(__name__)
 SLIDING_WINDOW: Optional[int] = None
 SLIDING_WINDOW_BLOCKS: Optional[int] = None
 
+MEM_POOL = torch.cuda.graph_pool_handle()
+
+
+def set_sliding_window(sliding_window: int, sliding_window_blocks: int):
+    global SLIDING_WINDOW
+    global SLIDING_WINDOW_BLOCKS
+    SLIDING_WINDOW = sliding_window
+    SLIDING_WINDOW_BLOCKS = sliding_window_blocks
+
+
+def get_sliding_windows() -> Tuple[int, int]:
+    global SLIDING_WINDOW
+    global SLIDING_WINDOW_BLOCKS
+    return SLIDING_WINDOW, SLIDING_WINDOW_BLOCKS
+
 
 # Adds windowing logic to FlashCausalLMBatch
 @dataclass
@@ -51,18 +65,19 @@ class FlashMistralBatch(FlashCausalLMBatch):
         dtype: torch.dtype,
         device: torch.device,
     ) -> "FlashCausalLMBatch":
-        global SLIDING_WINDOW
-        global SLIDING_WINDOW_BLOCKS
+        batch_tokenized_inputs = cls.batch_tokenized_inputs(pb.requests, tokenizer)
+        return cls.from_tokenized(pb, tokenizer, batch_tokenized_inputs, dtype, device)
 
-        batch_inputs = []
-        max_truncation = 0
-        for r in pb.requests:
-            batch_inputs.append(r.inputs)
-            max_truncation = max(max_truncation, r.truncate)
-
-        batch_tokenized_inputs = tokenizer(
-            batch_inputs, truncation=True, max_length=max_truncation
-        )["input_ids"]
+    @classmethod
+    def from_tokenized(
+        cls,
+        pb: generate_pb2.Batch,
+        tokenizer: PreTrainedTokenizerBase,
+        batch_tokenized_inputs,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> "FlashCausalLMBatch":
+        sliding_window, sliding_window_blocks = get_sliding_windows()
 
         position_ids = []
         cu_seqlen_prefill = [0]
@@ -132,12 +147,13 @@ class FlashMistralBatch(FlashCausalLMBatch):
 
             # Paged attention
             # Remove one as the first token des not have a past
-            total_tokens = input_length + max_new_tokens - 1
+            speculative_length = get_speculate()
+            total_tokens = input_length + max_new_tokens - 1 + speculative_length
 
             # Needed blocks can not go over SLIDING_WINDOW_BLOCKS
-            needed_blocks = min(
-                math.ceil(total_tokens / BLOCK_SIZE), SLIDING_WINDOW_BLOCKS
-            )
+            needed_blocks = math.ceil(total_tokens / BLOCK_SIZE)
+            if sliding_window_blocks is not None:
+                needed_blocks = min(needed_blocks, sliding_window_blocks)
             blocks += needed_blocks
 
             needed_blocks_slots.append((needed_blocks, total_tokens))
@@ -151,12 +167,13 @@ class FlashMistralBatch(FlashCausalLMBatch):
             slot_indices.append(request_slot_indices)
 
             # Create tensor to slice into the kv tensor in prefill
-            request_prefill_cache_indices = torch.arange(
-                cumulative_length + max(0, input_length - SLIDING_WINDOW),
-                cumulative_length + input_length,
-                dtype=torch.int64,
-            )
-            prefill_cache_indices.append(request_prefill_cache_indices)
+            if sliding_window is not None:
+                request_prefill_cache_indices = torch.arange(
+                    cumulative_length + max(0, input_length - sliding_window),
+                    cumulative_length + input_length,
+                    dtype=torch.int64,
+                )
+                prefill_cache_indices.append(request_prefill_cache_indices)
 
             all_prefill_logprobs = all_prefill_logprobs and r.prefill_logprobs
             no_prefill_logprobs = no_prefill_logprobs and not r.prefill_logprobs
@@ -183,10 +200,12 @@ class FlashMistralBatch(FlashCausalLMBatch):
             cumulative_max_length += total_tokens
             max_seqlen = max(max_seqlen, input_length)
             max_blocks = max(max_blocks, needed_blocks)
-            max_length = max(max_length, input_length + max_new_tokens)
+            max_length = max(
+                max_length, input_length + max_new_tokens + speculative_length
+            )
 
         next_token_chooser = HeterogeneousNextTokenChooser.from_pb(
-            next_token_chooser_parameters, dtype, device
+            next_token_chooser_parameters, dtype, device, tokenizer
         )
         start_slots = torch.tensor(start_slots, dtype=torch.int64)
 
@@ -206,12 +225,14 @@ class FlashMistralBatch(FlashCausalLMBatch):
             input_ids = np.concatenate(all_input_ids, dtype=np.int64)
             position_ids = torch.cat(position_ids)
             slot_indices = torch.cat(slot_indices)
-            prefill_cache_indices = torch.cat(prefill_cache_indices)
+            if sliding_window is not None:
+                prefill_cache_indices = torch.cat(prefill_cache_indices)
         else:
             input_ids = all_input_ids[0]
             position_ids = position_ids[0]
             slot_indices = slot_indices[0]
-            prefill_cache_indices = prefill_cache_indices[0]
+            if sliding_window is not None:
+                prefill_cache_indices = prefill_cache_indices[0]
 
         cu_seqlen_prefill = torch.tensor(
             cu_seqlen_prefill, device=device, dtype=torch.int32
@@ -219,7 +240,9 @@ class FlashMistralBatch(FlashCausalLMBatch):
 
         position_ids = position_ids.to(device)
         slot_indices = slot_indices.to(device)
-        prefill_cache_indices = prefill_cache_indices.to(device)
+        prefill_cache_indices = (
+            prefill_cache_indices.to(device) if sliding_window is not None else None
+        )
         input_ids = torch.tensor(input_ids, dtype=torch.int64, device=device)
         input_lengths_tensor = torch.tensor(
             input_lengths, dtype=torch.int32, device=device
@@ -272,29 +295,31 @@ class FlashMistralBatch(FlashCausalLMBatch):
             blocks=blocks,
             max_blocks=max_blocks,
             prefill_cache_indices=prefill_cache_indices,
+            speculative_ids=None,
         )
 
 
-class FlashMistral(FlashCausalLM):
+class BaseFlashMistral(FlashCausalLM):
     def __init__(
         self,
+        model_cls,
         model_id: str,
+        config_cls=AutoConfig,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
+        tokenizer_class=AutoTokenizer,
     ):
-        global SLIDING_WINDOW
-        global SLIDING_WINDOW_BLOCKS
-
         self.process_group, rank, world_size = initialize_torch_distributed()
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16 if dtype is None else dtype
         else:
-            raise NotImplementedError("FlashLlama is only available on GPU")
+            raise NotImplementedError("FlashMistral is only available on GPU")
 
-        tokenizer = LlamaTokenizerFast.from_pretrained(
+        tokenizer = tokenizer_class.from_pretrained(
             model_id,
             revision=revision,
             padding_side="left",
@@ -302,31 +327,40 @@ class FlashMistral(FlashCausalLM):
             trust_remote_code=trust_remote_code,
         )
 
-        config = MistralConfig.from_pretrained(
+        config = config_cls.from_pretrained(
             model_id, revision=revision, trust_remote_code=trust_remote_code
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
 
         # Set context windows
-        SLIDING_WINDOW = config.sliding_window
-        SLIDING_WINDOW_BLOCKS = math.ceil(config.sliding_window / BLOCK_SIZE)
+        if getattr(config, "sliding_window", None) is not None:
+            set_sliding_window(
+                config.sliding_window, math.ceil(config.sliding_window / BLOCK_SIZE)
+            )
+        else:
+            config.sliding_window = None
 
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
         weights = Weights(filenames, device, dtype, process_group=self.process_group)
         if config.quantize in ["gptq", "awq"]:
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
-        model = FlashMistralForCausalLM(config, weights)
+        prefix = ""
+        model = model_cls(prefix, config, weights)
+
+        self.cuda_graphs = {}
 
         torch.distributed.barrier(group=self.process_group)
-        super(FlashMistral, self).__init__(
+        num_layers, num_kv_heads, head_size = self.get_layer_config(model)
+        super().__init__(
             model=model,
             tokenizer=tokenizer,
-            num_layers=len(model.model.layers),
-            num_kv_heads=model.model.num_key_value_heads,
-            head_size=model.model.head_size,
+            num_layers=num_layers,
+            num_kv_heads=num_kv_heads,
+            head_size=head_size,
             dtype=dtype,
             device=device,
             rank=rank,
@@ -334,24 +368,207 @@ class FlashMistral(FlashCausalLM):
             sliding_window=config.sliding_window,
         )
 
+    def get_layer_config(self, model) -> Tuple[int, int, int]:
+        return (
+            len(model.model.layers),
+            model.model.num_key_value_heads,
+            model.model.head_size,
+        )
+
+    def max_past(self) -> int:
+        return self.model.max_past
+
     @property
     def batch_type(self) -> Type[FlashMistralBatch]:
         return FlashMistralBatch
 
-    def forward(self, batch: FlashMistralBatch) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Model Forward
-        logits = self.model.forward(
-            input_ids=batch.input_ids,
-            position_ids=batch.position_ids,
-            cu_seqlen_prefill=batch.cu_seqlen_prefill,
-            kv_cache=get_cache_manager().kv_cache,
-            block_tables=batch.block_tables_tensor,
-            slots=batch.slots[batch.slot_indices],
-            input_lengths=batch.input_lengths_tensor,
-            max_s=batch.max_seqlen,
-            prefill_cache_indices=batch.prefill_cache_indices,
-            lm_head_indices=batch.prefill_head_indices,
+    def cuda_graph_warmup(self, bs: int, max_s: int, max_bt: int):
+        input_ids = torch.zeros(bs, dtype=torch.int64, device=self.device)
+        position_ids = torch.zeros(bs, dtype=torch.int32, device=self.device)
+        slots = torch.arange(bs, dtype=torch.int64, device=self.device)
+        input_lengths = torch.ones(bs, dtype=torch.int32, device=self.device) * max_s
+        block_tables = (
+            torch.arange(max_bt, dtype=torch.int32, device=self.device)
+            .repeat(bs)
+            .reshape((bs, max_bt))
+        )
+        kv_cache = get_cache_manager().kv_cache
+
+        self.cuda_graphs[bs] = {
+            "input_ids": input_ids,
+            "position_ids": position_ids,
+            "kv_cache": kv_cache,
+            "block_tables": block_tables,
+            "slots": slots,
+            "input_lengths": input_lengths,
+        }
+        graph = torch.cuda.CUDAGraph()
+        self.cuda_graphs[bs]["graph"] = graph
+
+        torch.cuda.synchronize()
+        # Run once outside to warmup
+        self.model.forward(
+            input_ids=input_ids,
+            position_ids=position_ids,
+            cu_seqlen_prefill=None,
+            kv_cache=kv_cache,
+            block_tables=block_tables,
+            slots=slots,
+            input_lengths=input_lengths,
+            max_s=max_s,
+            prefill_cache_indices=None,
+            lm_head_indices=None,
+        )
+        torch.cuda.synchronize()
+
+        with torch.cuda.graph(graph, pool=MEM_POOL):
+            logits, speculative_logits = self.model.forward(
+                input_ids=input_ids,
+                position_ids=position_ids,
+                cu_seqlen_prefill=None,
+                kv_cache=kv_cache,
+                block_tables=block_tables,
+                slots=slots,
+                input_lengths=input_lengths,
+                max_s=max_s,
+                prefill_cache_indices=None,
+                lm_head_indices=None,
+            )
+            self.cuda_graphs[bs]["logits"] = logits
+            self.cuda_graphs[bs]["speculative_logits"] = speculative_logits
+        torch.cuda.synchronize()
+
+    def forward(
+        self, batch: FlashMistralBatch
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        # Model Forward
+        if batch.speculative_ids is not None:
+            input_ids = batch.input_ids
+            position_ids = batch.position_ids
+            cu_seqlen_prefill = batch.cu_seqlen_prefill
+            kv_cache = get_cache_manager().kv_cache
+            block_tables = batch.block_tables_tensor
+            slots = batch.slots[batch.slot_indices]
+            input_lengths = batch.input_lengths_tensor
+            max_s = batch.max_seqlen
+            lm_head_indices = batch.prefill_head_indices
+
+            speculative_ids = batch.speculative_ids
+
+            B, speculative_length = speculative_ids.shape
+            new_length = speculative_length + 1
+            new_input_ids = torch.cat(
+                [input_ids.unsqueeze(-1), speculative_ids], dim=1
+            ).reshape(-1)
+            arange = torch.arange(new_length, device=position_ids.device).unsqueeze(0)
+            arange_int = arange.to(dtype=torch.int32)
+            new_position_ids = (
+                position_ids.unsqueeze(-1).expand(B, new_length) + arange
+            ).view(-1)
+            slots = (slots.unsqueeze(-1).expand(B, new_length) + arange_int).view(-1)
+            input_lengths = (
+                input_lengths.unsqueeze(-1).expand(B, new_length) + arange_int
+            ).view(-1)
+
+            # Add Copy the block tables for all members
+            block_tables = (
+                block_tables.unsqueeze(1)
+                .expand(B, new_length, -1)
+                .reshape(B * new_length, -1)
+                .contiguous()
+            )
+            max_s = max_s + speculative_length
+
+            input_ids = new_input_ids
+            position_ids = new_position_ids
+        else:
+            input_ids = batch.input_ids
+            position_ids = batch.position_ids
+            cu_seqlen_prefill = batch.cu_seqlen_prefill
+            kv_cache = get_cache_manager().kv_cache
+            block_tables = batch.block_tables_tensor
+            slots = batch.slots[batch.slot_indices]
+            input_lengths = batch.input_lengths_tensor
+            max_s = batch.max_seqlen
+            lm_head_indices = batch.prefill_head_indices
+
+        if cu_seqlen_prefill is None and self.max_past() is not None:
+            # In decode, not prefill, we're actually overwriting the KV-cache
+            # in a circular buffer mode.
+            # This makes sure the max_s for the decode pass is correct.
+            max_s = min(self.max_past(), max_s)
+
+        bs = input_ids.shape[0]
+        padded_bs = bs
+        if bs == 3:
+            padded_bs = 4
+        elif 3 < bs <= 8:
+            padded_bs = 8
+        elif bs > 8:
+            padded_bs = (bs + 7) // 8 * 8
+
+        # Try to find an associated cuda graph
+        cuda_graph = self.cuda_graphs.get(padded_bs, None)
+
+        if cu_seqlen_prefill is not None or cuda_graph is None:
+            logits, speculative_logits = self.model.forward(
+                input_ids=input_ids,
+                position_ids=position_ids,
+                cu_seqlen_prefill=cu_seqlen_prefill,
+                kv_cache=kv_cache,
+                block_tables=block_tables,
+                slots=slots,
+                input_lengths=input_lengths,
+                max_s=max_s,
+                prefill_cache_indices=batch.prefill_cache_indices,
+                lm_head_indices=lm_head_indices,
+            )
+            if batch.prefill_cache_indices is not None:
+                batch.prefill_cache_indices = None
+            return logits, speculative_logits
+
+        # Copy inputs to the static inputs of the cuda graph
+        # Static inputs are potentially padded
+        cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
+        cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
+        cuda_graph["block_tables"][
+            : block_tables.shape[0], : block_tables.shape[1]
+        ] = block_tables
+        cuda_graph["slots"].fill_(-1)
+        cuda_graph["slots"][: slots.shape[0]] = slots
+        cuda_graph["input_lengths"].zero_()
+        cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
+
+        # Replay the graph
+        cuda_graph["graph"].replay()
+
+        # Slice output to the correct shape
+        speculative_logits = (
+            cuda_graph["speculative_logits"][:bs]
+            if cuda_graph["speculative_logits"] is not None
+            else None
+        )
+        logits = cuda_graph["logits"][:bs]
+        return logits, speculative_logits
+
+
+class FlashMistral(BaseFlashMistral):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        super(FlashMistral, self).__init__(
+            config_cls=MistralConfig,
+            model_cls=FlashMistralForCausalLM,
+            model_id=model_id,
+            revision=revision,
+            quantize=quantize,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
         )
-        if batch.prefill_cache_indices is not None:
-            batch.prefill_cache_indices = None
-        return logits
diff --git a/server/text_generation_server/models/flash_mixtral.py b/server/text_generation_server/models/flash_mixtral.py
new file mode 100644
index 00000000..2ee35e82
--- /dev/null
+++ b/server/text_generation_server/models/flash_mixtral.py
@@ -0,0 +1,31 @@
+import torch
+
+from typing import Optional
+
+from text_generation_server.models.flash_mistral import BaseFlashMistral
+from text_generation_server.models.custom_modeling.flash_mixtral_modeling import (
+    MixtralConfig,
+    FlashMixtralForCausalLM,
+)
+
+
+class FlashMixtral(BaseFlashMistral):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        super(FlashMixtral, self).__init__(
+            config_cls=MixtralConfig,
+            model_cls=FlashMixtralForCausalLM,
+            model_id=model_id,
+            revision=revision,
+            quantize=quantize,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
+        )
diff --git a/server/text_generation_server/models/flash_neox.py b/server/text_generation_server/models/flash_neox.py
index 58f345a9..5a351bd7 100644
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@@ -24,6 +24,7 @@ class FlashNeoXSharded(FlashCausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -46,6 +47,7 @@ class FlashNeoXSharded(FlashCausalLM):
             model_id, revision=revision, trust_remote_code=trust_remote_code
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
 
         torch.distributed.barrier(group=self.process_group)
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
@@ -53,7 +55,7 @@ class FlashNeoXSharded(FlashCausalLM):
             filenames, device=device, dtype=dtype, process_group=self.process_group
         )
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         model = FlashGPTNeoXForCausalLM(config, weights)
 
diff --git a/server/text_generation_server/models/flash_phi.py b/server/text_generation_server/models/flash_phi.py
new file mode 100644
index 00000000..cb55f9e6
--- /dev/null
+++ b/server/text_generation_server/models/flash_phi.py
@@ -0,0 +1,103 @@
+import torch
+import torch.distributed
+
+from opentelemetry import trace
+from transformers import AutoConfig, AutoTokenizer
+from typing import Optional
+
+from text_generation_server.models import FlashCausalLM
+from text_generation_server.models.custom_modeling.flash_phi_modeling import (
+    FlashPhiForCausalLM,
+    PhiConfig,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+tracer = trace.get_tracer(__name__)
+
+
+class FlashPhi(FlashCausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashPhi is only available on GPU")
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+
+        config = PhiConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = FlashPhiForCausalLM(config, weights)
+        if use_medusa:
+            from text_generation_server.utils.medusa import MedusaModel
+            from huggingface_hub import hf_hub_download
+            import json
+            import os
+            from pathlib import Path
+
+            is_local_model = (
+                Path(use_medusa).exists() and Path(use_medusa).is_dir()
+            ) or os.getenv("WEIGHTS_CACHE_OVERRIDE", None) is not None
+
+            if not is_local_model:
+                medusa_config = hf_hub_download(
+                    use_medusa, revision=revision, filename="config.json"
+                )
+                medusa_head = hf_hub_download(
+                    use_medusa, revision=revision, filename="medusa_lm_head.pt"
+                )
+            else:
+                medusa_config = str(Path(use_medusa) / "config.json")
+                medusa_head = str(Path(use_medusa) / "medusa_lm_head.pt")
+
+            with open(medusa_config, "r") as f:
+                config = json.load(f)
+            medusa_sf = medusa_head[: -len(".pt")] + ".safetensors"
+            weights = Weights(
+                [medusa_sf], device, dtype, process_group=self.process_group
+            )
+            lm_head = model.lm_head
+            model.lm_head = MedusaModel(config, weights, lm_head)
+
+        torch.distributed.barrier(group=self.process_group)
+        super(FlashPhi, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+        )
diff --git a/server/text_generation_server/models/flash_qwen2.py b/server/text_generation_server/models/flash_qwen2.py
new file mode 100644
index 00000000..c3c63516
--- /dev/null
+++ b/server/text_generation_server/models/flash_qwen2.py
@@ -0,0 +1,88 @@
+import math
+
+import torch
+import torch.distributed
+
+from opentelemetry import trace
+from transformers.models.qwen2 import Qwen2Tokenizer
+from typing import Optional
+
+from text_generation_server.models.cache_manager import BLOCK_SIZE
+from text_generation_server.models.flash_mistral import (
+    BaseFlashMistral,
+    set_sliding_window,
+)
+from text_generation_server.models.custom_modeling.flash_qwen2_modeling import (
+    Qwen2ForCausalLM,
+)
+from transformers.models.qwen2 import Qwen2Config
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+tracer = trace.get_tracer(__name__)
+
+
+class FlashQwen2(BaseFlashMistral):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashQwen2 is only available on GPU")
+
+        tokenizer = Qwen2Tokenizer.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+
+        config = Qwen2Config.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+
+        # Set context windows
+        if config.sliding_window is not None:
+            set_sliding_window(
+                config.sliding_window, math.ceil(config.sliding_window / BLOCK_SIZE)
+            )
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = Qwen2ForCausalLM(config, weights)
+
+        self.cuda_graphs = {}
+
+        torch.distributed.barrier(group=self.process_group)
+        super(BaseFlashMistral, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+            sliding_window=config.sliding_window,
+        )
diff --git a/server/text_generation_server/models/flash_rw.py b/server/text_generation_server/models/flash_rw.py
index 195b3883..fc1e26bd 100644
--- a/server/text_generation_server/models/flash_rw.py
+++ b/server/text_generation_server/models/flash_rw.py
@@ -25,6 +25,7 @@ class FlashRWSharded(FlashCausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -61,8 +62,9 @@ class FlashRWSharded(FlashCausalLM):
         )
 
         config.quantize = quantize
+        config.use_medusa = use_medusa
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         model = FlashRWForCausalLM(config, weights)
 
diff --git a/server/text_generation_server/models/flash_santacoder.py b/server/text_generation_server/models/flash_santacoder.py
index 29505902..034949f9 100644
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@@ -27,6 +27,7 @@ class FlashSantacoderSharded(FlashCausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -51,6 +52,7 @@ class FlashSantacoderSharded(FlashCausalLM):
             trust_remote_code=True,
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
         config.transpose = config.architectures[0].startswith("GPT2")
 
         torch.distributed.barrier(group=self.process_group)
@@ -63,7 +65,7 @@ class FlashSantacoderSharded(FlashCausalLM):
             aliases={"transformer.wte.weight": ["lm_head.weight"]},
         )
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         model = FlashSantacoderForCausalLM(config, weights)
 
diff --git a/server/text_generation_server/models/flash_starcoder2.py b/server/text_generation_server/models/flash_starcoder2.py
new file mode 100644
index 00000000..68e726d8
--- /dev/null
+++ b/server/text_generation_server/models/flash_starcoder2.py
@@ -0,0 +1,86 @@
+import math
+
+import torch
+
+from typing import Optional
+
+from transformers.models.gpt2 import GPT2TokenizerFast
+
+from text_generation_server.models.cache_manager import BLOCK_SIZE
+from text_generation_server.models.flash_mistral import (
+    BaseFlashMistral,
+    set_sliding_window,
+)
+from text_generation_server.models.custom_modeling.flash_starcoder2_modeling import (
+    Starcoder2Config,
+    FlashStarcoder2ForCausalLM,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+
+# Starcoder2 has the same base as Mistral
+class FlashStarcoder2(BaseFlashMistral):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, rank, world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device(f"cuda:{rank}")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            raise NotImplementedError("FlashStarcoder2 is only available on GPU")
+
+        tokenizer = GPT2TokenizerFast.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+
+        config = Starcoder2Config.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+
+        # Set context windows
+        if config.sliding_window is not None:
+            set_sliding_window(
+                config.sliding_window, math.ceil(config.sliding_window / BLOCK_SIZE)
+            )
+
+        torch.distributed.barrier(group=self.process_group)
+
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        if config.quantize in ["gptq", "awq"]:
+            weights._set_gptq_params(model_id, revision)
+
+        model = FlashStarcoder2ForCausalLM(config, weights)
+
+        self.cuda_graphs = {}
+
+        torch.distributed.barrier(group=self.process_group)
+        super(BaseFlashMistral, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            num_layers=len(model.model.layers),
+            num_kv_heads=model.model.num_key_value_heads,
+            head_size=model.model.head_size,
+            dtype=dtype,
+            device=device,
+            rank=rank,
+            world_size=world_size,
+            sliding_window=config.sliding_window,
+        )
diff --git a/server/text_generation_server/models/galactica.py b/server/text_generation_server/models/galactica.py
index b296c96e..a46f86be 100644
--- a/server/text_generation_server/models/galactica.py
+++ b/server/text_generation_server/models/galactica.py
@@ -92,7 +92,9 @@ class GalacticaCausalLMBatch(CausalLMBatch):
             requests_idx_mapping[r.id] = i
             # Add escape_custom_split_sequence to the CausalLMBatch logic
             inputs.append(escape_custom_split_sequence(r.inputs))
-            next_token_choosers.append(NextTokenChooser.from_pb(r.parameters, device))
+            next_token_choosers.append(
+                NextTokenChooser.from_pb(r.parameters, device, tokenizer)
+            )
             stopping_criteria = StoppingCriteria.from_pb(
                 r.stopping_parameters, tokenizer
             )
@@ -165,6 +167,7 @@ class GalacticaSharded(CausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -192,6 +195,7 @@ class GalacticaSharded(CausalLM):
         )
         config.quantize = quantize
         tokenizer.pad_token_id = config.pad_token_id
+        config.use_medusa = use_medusa
 
         torch.distributed.barrier(group=self.process_group)
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
@@ -199,7 +203,7 @@ class GalacticaSharded(CausalLM):
             filenames, device=device, dtype=dtype, process_group=self.process_group
         )
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         model = OPTForCausalLM(config, weights)
 
@@ -227,10 +231,10 @@ class GalacticaSharded(CausalLM):
     def forward(
         self, input_ids, attention_mask, position_ids, past_key_values: Optional = None
     ):
-        outputs = self.model.forward(
+        outputs, speculative_logits = self.model.forward(
             input_ids=input_ids,
             attention_mask=attention_mask,
             past_key_values=past_key_values,
             use_cache=True,
         )
-        return outputs.logits, outputs.past_key_values
+        return outputs.logits, speculative_logits, outputs.past_key_values
diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py
new file mode 100644
index 00000000..6f554049
--- /dev/null
+++ b/server/text_generation_server/models/globals.py
@@ -0,0 +1,14 @@
+import torch
+import os
+
+MEM_POOL = torch.cuda.graph_pool_handle()
+# This is overridden by the cli
+cuda_graphs = os.getenv("CUDA_GRAPHS")
+if cuda_graphs is not None:
+    try:
+        cuda_graphs = [int(item) for item in cuda_graphs.split(",")]
+    except Exception as e:
+        raise RuntimeError(
+            f"Could not parse cuda graphs {cuda_graphs}, expected comma separated list for batch sizes to run on: {e}"
+        )
+CUDA_GRAPHS = cuda_graphs
diff --git a/server/text_generation_server/models/gpt_neox.py b/server/text_generation_server/models/gpt_neox.py
index d4c64dfe..1c4cfe7d 100644
--- a/server/text_generation_server/models/gpt_neox.py
+++ b/server/text_generation_server/models/gpt_neox.py
@@ -24,6 +24,7 @@ class GPTNeoxSharded(CausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -50,6 +51,7 @@ class GPTNeoxSharded(CausalLM):
             trust_remote_code=trust_remote_code,
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
 
         torch.distributed.barrier(group=self.process_group)
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
@@ -57,7 +59,7 @@ class GPTNeoxSharded(CausalLM):
             filenames, device=device, dtype=dtype, process_group=self.process_group
         )
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         model = GPTNeoxForCausalLM(config, weights)
 
@@ -75,7 +77,7 @@ class GPTNeoxSharded(CausalLM):
     def forward(
         self, input_ids, attention_mask, position_ids, past_key_values: Optional = None
     ):
-        outputs = self.model.forward(
+        outputs, speculative_logits = self.model.forward(
             input_ids=input_ids,
             attention_mask=attention_mask,
             position_ids=position_ids,
@@ -84,4 +86,4 @@ class GPTNeoxSharded(CausalLM):
         )
 
         logits = outputs.logits
-        return logits, outputs.past_key_values
+        return logits, speculative_logits, outputs.past_key_values
diff --git a/server/text_generation_server/models/idefics.py b/server/text_generation_server/models/idefics.py
index fa23d1f9..30bf4aa6 100644
--- a/server/text_generation_server/models/idefics.py
+++ b/server/text_generation_server/models/idefics.py
@@ -31,6 +31,7 @@ class IDEFICSSharded(IdeficsCausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -39,7 +40,7 @@ class IDEFICSSharded(IdeficsCausalLM):
             device = torch.device(f"cuda:{rank}")
             # 9b seems to work correctly enough in float16, but 80b seems
             # to be really saturating for f16.
-            dtype = torch.bfloat16 if dtype is None else dtype
+            dtype = torch.float16 if dtype is None else dtype
         else:
             device = torch.device("cpu")
             dtype = torch.float32 if dtype is None else dtype
@@ -51,6 +52,7 @@ class IDEFICSSharded(IdeficsCausalLM):
             trust_remote_code=trust_remote_code,
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
         config.vision_config.quantize = quantize
 
         tokenizer = LlamaTokenizerFast.from_pretrained(
diff --git a/server/text_generation_server/models/idefics_causal_lm.py b/server/text_generation_server/models/idefics_causal_lm.py
index dcad1fa9..e78a9655 100644
--- a/server/text_generation_server/models/idefics_causal_lm.py
+++ b/server/text_generation_server/models/idefics_causal_lm.py
@@ -1,17 +1,12 @@
 import torch
-import inspect
-import re
-from io import BytesIO
-import base64
-from PIL import Image
-import re
+import torch
+import time
 
 from dataclasses import dataclass
 from opentelemetry import trace
 from transformers import (
     AutoProcessor,
     AutoTokenizer,
-    AutoModelForCausalLM,
     PreTrainedTokenizerBase,
     ProcessorMixin,
 )
@@ -20,35 +15,19 @@ from typing import Optional, Tuple, List, Type, Dict
 from text_generation_server.models import Model
 from text_generation_server.models.types import (
     Batch,
-    PrefillTokens,
+    Tokens,
     Generation,
     GeneratedText,
 )
 from text_generation_server.pb import generate_pb2
 from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling
+from text_generation_server.models.vlm_causal_lm import split
 
 import re
 
 IMAGES = re.compile(r"!\[[^\]]*\]\((.*?)\s*(\"(?:.*[^\"])\")?\s*\)")
 
 
-def split(string):
-    parts = []
-    cursor = 0
-    for pattern in IMAGES.finditer(string):
-        start = pattern.start()
-        if start != cursor:
-            parts.append(string[cursor:start])
-
-        parts.append(pattern.group(1))
-        cursor = pattern.end()
-
-    if cursor != len(string):
-        parts.append(string[cursor:])
-
-    return parts
-
-
 tracer = trace.get_tracer(__name__)
 
 
@@ -99,10 +78,21 @@ class IdeficsCausalLMBatch(Batch):
 
     @classmethod
     def from_pb(
+        cls,
+        pb: generate_pb2.Batch,
+        tokenizer: PreTrainedTokenizerBase,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> "IdeficsCausalLMBatch":
+        raise NotImplementedError
+
+    @classmethod
+    def from_pb_processor(
         cls,
         pb: generate_pb2.Batch,
         tokenizer: PreTrainedTokenizerBase,
         processor: ProcessorMixin,  # Hack
+        config,
         dtype: torch.dtype,
         device: torch.device,
     ) -> "IdeficsCausalLMBatch":
@@ -120,7 +110,9 @@ class IdeficsCausalLMBatch(Batch):
         for i, r in enumerate(pb.requests):
             requests_idx_mapping[r.id] = i
             inputs.append(r.inputs)
-            next_token_choosers.append(NextTokenChooser.from_pb(r.parameters, device))
+            next_token_choosers.append(
+                NextTokenChooser.from_pb(r.parameters, device, tokenizer)
+            )
             stopping_criteria = StoppingCriteria.from_pb(
                 r.stopping_parameters, tokenizer
             )
@@ -131,10 +123,14 @@ class IdeficsCausalLMBatch(Batch):
                 padding_right_offset, stopping_criteria.max_new_tokens
             )
 
+        # TODO Check impact on idefics
         prompts = []
         for inp in inputs:
             # Each input is encoded into a list, where each element of this input list is either a string or a URL
-            prompts.append(split(inp))
+            prompt = []
+            for chunk in split(inp):
+                prompt.append(chunk["content"])
+            prompts.append(prompt)
 
         # The processor replaces the call to tokenizer, and
         # a/ takes care of fetching images from the URL
@@ -145,7 +141,8 @@ class IdeficsCausalLMBatch(Batch):
             padding=True,
             truncation=True,
             max_length=max_truncation,
-            add_end_of_utterance_token=False,  # Already taken care of inside the prompts, so bypassing the processor's handling of this token
+            # TODO Check impact on idefics
+            # add_end_of_utterance_token=False,  # Already taken care of inside the prompts, so bypassing the processor's handling of this token
         ).to(device)
         for _ in pb.requests:
             input_len = tokenized_inputs["input_ids"].shape[1]
@@ -160,7 +157,7 @@ class IdeficsCausalLMBatch(Batch):
         max_input_length = input_lengths.max()
 
         input_ids = tokenized_inputs["input_ids"]
-        pixel_values = tokenized_inputs["pixel_values"]
+        pixel_values = tokenized_inputs.get("pixel_values", None)
         image_hidden_states = None
         # Allocate maximum attention_mask
         attention_mask = input_ids.new_zeros(
@@ -169,16 +166,19 @@ class IdeficsCausalLMBatch(Batch):
         # Copy tokenizer attention_mask into fully allocated attention_mask
         attention_mask[:, :max_input_length] = tokenized_inputs["attention_mask"]
         # Do the same for image_attention_mask
-        image_attention_mask = input_ids.new_zeros(
-            (
-                pb.size,
-                max_input_length + padding_right_offset,
-                tokenized_inputs["pixel_values"].size(1),
+        if pixel_values is None:
+            image_attention_mask = None
+        else:
+            image_attention_mask = input_ids.new_zeros(
+                (
+                    pb.size,
+                    max_input_length + padding_right_offset,
+                    pixel_values.size(1),
+                )
             )
-        )
-        image_attention_mask[:, :max_input_length, :] = tokenized_inputs[
-            "image_attention_mask"
-        ]
+            image_attention_mask[:, :max_input_length, :] = tokenized_inputs[
+                "image_attention_mask"
+            ]
 
         position_ids = tokenized_inputs["attention_mask"].long().cumsum(-1) - 1
         position_ids.masked_fill_(tokenized_inputs["attention_mask"] == 0, 1)
@@ -407,9 +407,9 @@ class IdeficsCausalLMBatch(Batch):
                 pixel_values = batch.pixel_values.new_zeros(
                     (total_batch_size, max_num_images, 3, 224, 224)
                 )
-            pixel_values[
-                start_index:end_index, :curr_batch_max_num_images
-            ] = batch.pixel_values
+            pixel_values[start_index:end_index, :curr_batch_max_num_images] = (
+                batch.pixel_values
+            )
 
             if image_attention_mask is None:
                 image_attention_mask = batch.image_attention_mask.new_zeros(
@@ -506,14 +506,14 @@ class IdeficsCausalLMBatch(Batch):
                 # We slice the keys to remove the padding from previous batches
                 past_seq_len = batch.max_input_length - 1
                 if batch.keys_head_dim_last:
-                    padded_past_keys[
-                        start_index:end_index, :, -past_seq_len:, :
-                    ] = past_keys[:, :, -past_seq_len:, :]
+                    padded_past_keys[start_index:end_index, :, -past_seq_len:, :] = (
+                        past_keys[:, :, -past_seq_len:, :]
+                    )
                 else:
                     # BLOOM case
-                    padded_past_keys[
-                        start_index:end_index, :, :, -past_seq_len:
-                    ] = past_keys[:, :, :, -past_seq_len:]
+                    padded_past_keys[start_index:end_index, :, :, -past_seq_len:] = (
+                        past_keys[:, :, :, -past_seq_len:]
+                    )
                 del past_keys
 
                 start_index = end_index
@@ -531,9 +531,9 @@ class IdeficsCausalLMBatch(Batch):
                 end_index = start_index + len(batch)
                 # We slice the past values to remove the padding from previous batches
                 past_seq_len = batch.max_input_length - 1
-                padded_past_values[
-                    start_index:end_index, :, -past_seq_len:, :
-                ] = past_values[:, :, -past_seq_len:, :]
+                padded_past_values[start_index:end_index, :, -past_seq_len:, :] = (
+                    past_values[:, :, -past_seq_len:, :]
+                )
                 del past_values
 
                 # Update values
@@ -609,9 +609,11 @@ class IdeficsCausalLM(Model):
             model_id,
             revision=revision,
             torch_dtype=dtype,
-            device_map="auto"
-            if torch.cuda.is_available() and torch.cuda.device_count() > 1
-            else None,
+            device_map=(
+                "auto"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 1
+                else None
+            ),
             load_in_8bit=quantize == "bitsandbytes",
             trust_remote_code=trust_remote_code,
         )
@@ -664,30 +666,39 @@ class IdeficsCausalLM(Model):
         if self.has_position_ids:
             kwargs["position_ids"] = position_ids
 
-        outputs = self.model.forward(**kwargs)
-        return outputs.logits, outputs.past_key_values, outputs.image_hidden_states
+        outputs, speculative_logits = self.model.forward(**kwargs)
+        return (
+            outputs.logits,
+            speculative_logits,
+            outputs.past_key_values,
+            outputs.image_hidden_states,
+        )
 
     @tracer.start_as_current_span("generate_token")
     def generate_token(
         self, batch: IdeficsCausalLMBatch
-    ) -> Tuple[List[Generation], Optional[IdeficsCausalLMBatch]]:
+    ) -> Tuple[List[Generation], Optional[IdeficsCausalLMBatch], Tuple[int, int]]:
+        start = time.time_ns()
         # slice the attention mask to the correct shape
         attention_mask = batch.attention_mask[:, : -batch.padding_right_offset]
-        if batch.input_ids.size(1) == 1:
-            # THIS is a hack: when calling idefics.generate, the first time, we need the whole image_attention_mask (size bs x max_seq_len x max_num_images),
-            # but the subsequent times, we only need the last attention mask along the `max_seq_len` dimension
-            # this is due to the nature IDEFICS: it's an encoder decoder, and so when decoding, only the currently generated
-            # token need to attend to the encoder hidden states (i.e. the vision encoder)
-            # Also see seq2seq_lm.Seq2SeqLM.generate_token which has roughly the same logic
-            image_attention_mask = batch.image_attention_mask[
-                :, -(batch.padding_right_offset + 1)
-            ].unsqueeze(1)
+        if batch.image_attention_mask is None:
+            image_attention_mask = None
         else:
-            image_attention_mask = batch.image_attention_mask[
-                :, : -batch.padding_right_offset
-            ]
+            if batch.input_ids.size(1) == 1:
+                # THIS is a hack: when calling idefics.generate, the first time, we need the whole image_attention_mask (size bs x max_seq_len x max_num_images),
+                # but the subsequent times, we only need the last attention mask along the `max_seq_len` dimension
+                # this is due to the nature IDEFICS: it's an encoder decoder, and so when decoding, only the currently generated
+                # token need to attend to the encoder hidden states (i.e. the vision encoder)
+                # Also see seq2seq_lm.Seq2SeqLM.generate_token which has roughly the same logic
+                image_attention_mask = batch.image_attention_mask[
+                    :, -(batch.padding_right_offset + 1)
+                ].unsqueeze(1)
+            else:
+                image_attention_mask = batch.image_attention_mask[
+                    :, : -batch.padding_right_offset
+                ]
 
-        logits, past, image_hidden_states = self.forward(
+        logits, speculative_logits, past, image_hidden_states = self.forward(
             input_ids=batch.input_ids,
             attention_mask=attention_mask,
             position_ids=batch.position_ids,
@@ -699,6 +710,8 @@ class IdeficsCausalLM(Model):
         # Hardcoded remove image tokens
         logits[:, 32000:32001] = torch.finfo(logits.dtype).min
 
+        start_decode = time.time_ns()
+
         # Results
         generations: List[Generation] = []
         stopped = True
@@ -791,8 +804,11 @@ class IdeficsCausalLM(Model):
                         clean_up_tokenization_spaces=False,
                         skip_special_tokens=False,
                     )
-                    prefill_tokens = PrefillTokens(
-                        prefill_token_ids, prefill_logprobs, prefill_texts
+                    prefill_tokens = Tokens(
+                        prefill_token_ids,
+                        prefill_logprobs,
+                        prefill_texts,
+                        is_special=[],
                     )
                 else:
                     prefill_tokens = None
@@ -802,10 +818,12 @@ class IdeficsCausalLM(Model):
                 generation = Generation(
                     request.id,
                     prefill_tokens,
-                    next_token_id_squeezed,
-                    next_token_logprob,
-                    next_token_text,
-                    next_token_id_squeezed.item() in self.all_special_ids,
+                    Tokens(
+                        [next_token_id_squeezed],
+                        [next_token_logprob],
+                        [next_token_text],
+                        [next_token_id_squeezed.item() in self.all_special_ids],
+                    ),
                     generated_text,
                     top_tokens,
                 )
@@ -813,6 +831,9 @@ class IdeficsCausalLM(Model):
                 generations.append(generation)
 
             # Update values
+            batch.next_token_choosers[i] = batch.next_token_choosers[i].advance_grammar(
+                next_token_id_squeezed.item()
+            )
             batch.input_ids[i, 0] = next_token_id
             batch.all_input_ids[i] = all_input_ids
             batch.input_lengths[i] = new_input_length
@@ -822,16 +843,18 @@ class IdeficsCausalLM(Model):
 
         # We finished all generations in the batch; there is no next batch
         if stopped:
-            return generations, None
+            forward_ns = start_decode - start
+            decode_ns = time.time_ns() - start_decode
+            return generations, None, (forward_ns, decode_ns)
 
         # Slice unused values from prefill
         batch.input_ids = batch.input_ids[:, :1]
 
         # Update attention_mask as we added a new token to input_ids
         batch.attention_mask[:, -batch.padding_right_offset] = 1
-        batch.image_attention_mask[
-            :, -batch.padding_right_offset, :
-        ] = batch.image_attention_mask[:, -(batch.padding_right_offset + 1), :]
+        batch.image_attention_mask[:, -batch.padding_right_offset, :] = (
+            batch.image_attention_mask[:, -(batch.padding_right_offset + 1), :]
+        )
         # Decrease right offset
         batch.padding_right_offset -= 1
 
@@ -842,4 +865,6 @@ class IdeficsCausalLM(Model):
         batch.past_key_values = past
         batch.image_hidden_states = image_hidden_states
 
-        return generations, batch
+        forward_ns = start_decode - start
+        decode_ns = time.time_ns() - start_decode
+        return generations, batch, (forward_ns, decode_ns)
diff --git a/server/text_generation_server/models/llava_next.py b/server/text_generation_server/models/llava_next.py
new file mode 100644
index 00000000..0ae1b46d
--- /dev/null
+++ b/server/text_generation_server/models/llava_next.py
@@ -0,0 +1,36 @@
+import torch
+
+from typing import Optional
+
+from transformers import (
+    AutoProcessor,
+)
+from text_generation_server.models.custom_modeling.llava_next import (
+    LlavaNextForConditionalGeneration,
+)
+
+from text_generation_server.models.vlm_causal_lm import VlmCausalLM
+
+
+class LlavaNext(VlmCausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.processor = AutoProcessor.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+        super().__init__(
+            model_cls=LlavaNextForConditionalGeneration,
+            model_id=model_id,
+            revision=revision,
+            quantize=quantize,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
+        )
diff --git a/server/text_generation_server/models/mamba.py b/server/text_generation_server/models/mamba.py
new file mode 100644
index 00000000..07a81491
--- /dev/null
+++ b/server/text_generation_server/models/mamba.py
@@ -0,0 +1,777 @@
+import torch
+import torch.distributed
+from transformers import AutoTokenizer, PreTrainedTokenizerBase
+from typing import Optional
+import os
+from text_generation_server.models.custom_modeling.mamba_modeling import (
+    MambaConfig,
+)
+from loguru import logger
+from text_generation_server.pb import generate_pb2
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+from text_generation_server.models.globals import CUDA_GRAPHS, MEM_POOL
+import time
+from text_generation_server.models.custom_modeling.mamba_modeling import (
+    MambaModel,
+    InferenceParams,
+)
+from text_generation_server.models import Model
+from typing import Any, List, Optional, Tuple, Type, Dict
+from text_generation_server.models.types import (
+    Batch,
+    Tokens,
+    Generation,
+    GeneratedText,
+)
+from text_generation_server.utils.tokens import batch_top_tokens, Sampling
+from dataclasses import dataclass
+from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling
+
+
+def new_inference_params(
+    n_blocks: int,
+    batch_size: int,
+    d_inner: int,
+    d_conv: int,
+    d_state: int,
+    seqlen_offset: int,
+    dtype: torch.dtype,
+    device: torch.device,
+):
+    max_seqlen = 0
+    conv_states = torch.zeros(
+        (
+            n_blocks,
+            batch_size,
+            d_inner,
+            d_conv,
+        ),
+        device=device,
+        dtype=dtype,
+    )
+    ssm_states = torch.zeros(
+        (
+            n_blocks,
+            batch_size,
+            d_inner,
+            d_state,
+        ),
+        device=device,
+        dtype=dtype,
+    )
+    inference_params = InferenceParams(
+        max_seqlen=max_seqlen,
+        max_batch_size=batch_size,
+        seqlen_offset=seqlen_offset,
+        conv_states=conv_states,
+        ssm_states=ssm_states,
+    )
+    return inference_params
+
+
+@dataclass
+class MambaBatch(Batch):
+    batch_id: int
+    requests: List[generate_pb2.Request]
+    requests_idx_mapping: Dict[int, int]
+
+    # Decoder values
+    input_ids: torch.Tensor
+
+    # All tokens
+    all_input_ids: List[torch.Tensor]
+
+    # Lengths of all generations present in the batch
+    input_lengths: List[int]
+    prefix_offsets: List[int]
+    read_offsets: List[int]
+
+    # Generation helpers
+    next_token_choosers: List[NextTokenChooser]
+    stopping_criterias: List[StoppingCriteria]
+    top_n_tokens: List[int]
+    top_n_tokens_tensor: torch.Tensor
+
+    # Metadata used for padding
+    max_input_length: int
+    padding_right_offset: int
+
+    # Maximum number of tokens this batch will grow to
+    max_tokens: int
+
+    # Past metadata
+    keys_head_dim_last: bool = True
+
+    # Inference params
+    inference_params: Optional[Dict[str, Any]] = None
+
+    def to_pb(self) -> generate_pb2.CachedBatch:
+        return generate_pb2.CachedBatch(
+            id=self.batch_id,
+            request_ids=[r.id for r in self.requests],
+            size=len(self),
+            max_tokens=self.max_tokens,
+        )
+
+    @classmethod
+    def from_pb(
+        cls,
+        pb: generate_pb2.Batch,
+        tokenizer: PreTrainedTokenizerBase,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> "MambaBatch":
+        inputs = []
+        next_token_choosers = []
+        stopping_criterias = []
+        top_n_tokens = []
+        prefix_offsets = []
+        read_offsets = []
+        requests_idx_mapping = {}
+
+        # Parse batch
+        max_truncation = 0
+        padding_right_offset = 0
+        max_decode_tokens = 0
+        for i, r in enumerate(pb.requests):
+            requests_idx_mapping[r.id] = i
+            inputs.append(r.inputs)
+            next_token_choosers.append(
+                NextTokenChooser.from_pb(r.parameters, device, tokenizer)
+            )
+            stopping_criteria = StoppingCriteria.from_pb(
+                r.stopping_parameters, tokenizer
+            )
+            stopping_criterias.append(stopping_criteria)
+            top_n_tokens.append(r.top_n_tokens)
+            max_truncation = max(max_truncation, r.truncate)
+            max_decode_tokens += stopping_criteria.max_new_tokens
+            padding_right_offset = max(
+                padding_right_offset, stopping_criteria.max_new_tokens
+            )
+
+        tokenized_inputs = tokenizer(
+            inputs,
+            return_tensors="pt",
+            padding=True,
+            return_token_type_ids=False,
+            truncation=True,
+            max_length=max_truncation,
+        ).to(device)
+        for _ in pb.requests:
+            input_len = tokenized_inputs["input_ids"].shape[1]
+            prefix_offsets.append(input_len - 5)
+            read_offsets.append(input_len)
+
+        input_lengths = tokenized_inputs["attention_mask"].sum(1)
+        max_input_length = input_lengths.max()
+        input_ids = tokenized_inputs["input_ids"]
+        all_input_ids = tokenized_inputs["input_ids"].T.split(1, dim=1)
+        top_n_tokens_tensor = torch.tensor(
+            top_n_tokens, device=device, dtype=torch.int64
+        )
+        max_tokens = len(inputs) * (max_input_length + max_decode_tokens)
+        return cls(
+            batch_id=pb.id,
+            requests=pb.requests,
+            requests_idx_mapping=requests_idx_mapping,
+            input_ids=input_ids,
+            # past_input_ids=None,
+            all_input_ids=list(all_input_ids),
+            input_lengths=input_lengths.tolist(),
+            prefix_offsets=prefix_offsets,
+            read_offsets=read_offsets,
+            next_token_choosers=next_token_choosers,
+            stopping_criterias=stopping_criterias,
+            top_n_tokens=top_n_tokens,
+            top_n_tokens_tensor=top_n_tokens_tensor,
+            max_input_length=max_input_length.item(),
+            padding_right_offset=padding_right_offset,
+            max_tokens=max_tokens,
+        )
+
+    def filter(self, request_ids: List[int]) -> Optional["MambaBatch"]:
+        if len(request_ids) == 0:
+            raise ValueError("Batch must have at least one request")
+        if len(request_ids) == len(self):
+            return self
+
+        keep_indices = []
+
+        # New values after filtering
+        requests_idx_mapping = {}
+        requests = []
+        input_lengths = []
+        prefix_offsets = []
+        read_offsets = []
+        all_input_ids = []
+        max_input_length = 0
+
+        next_token_choosers = []
+        stopping_criterias = []
+        top_n_tokens = []
+
+        total_remaining_decode_tokens = 0
+        new_padding_right_offset = 0
+
+        indices = []
+        for i, request_id in enumerate(request_ids):
+            idx = self.requests_idx_mapping[request_id]
+            requests_idx_mapping[request_id] = i
+            keep_indices.append(idx)
+
+            requests.append(self.requests[idx])
+            prefix_offsets.append(self.prefix_offsets[idx])
+            read_offsets.append(self.read_offsets[idx])
+            all_input_ids.append(self.all_input_ids[idx])
+
+            request_input_length = self.input_lengths[idx]
+            input_lengths.append(request_input_length)
+            max_input_length = max(max_input_length, request_input_length)
+            indices.append(idx)
+
+            next_token_choosers.append(self.next_token_choosers[idx])
+            stopping_criteria = self.stopping_criterias[idx]
+            stopping_criterias.append(stopping_criteria)
+            top_n_tokens.append(self.top_n_tokens[idx])
+            remaining_decode_tokens = (
+                stopping_criteria.max_new_tokens - stopping_criteria.current_tokens
+            )
+            total_remaining_decode_tokens += remaining_decode_tokens
+            new_padding_right_offset = max(
+                new_padding_right_offset, remaining_decode_tokens
+            )
+
+        # Apply indices to input_ids, attention mask, past key values and other items that need to be cached
+        input_ids = self.input_ids[keep_indices]
+
+        top_n_tokens_tensor = self.top_n_tokens_tensor[keep_indices]
+        max_tokens = len(request_ids) * max_input_length + total_remaining_decode_tokens
+
+        self.requests = requests
+        self.requests_idx_mapping = requests_idx_mapping
+        self.input_ids = input_ids
+        self.all_input_ids = all_input_ids
+        self.input_lengths = input_lengths
+        self.prefix_offsets = prefix_offsets
+        self.read_offsets = read_offsets
+        self.next_token_choosers = next_token_choosers
+        self.stopping_criterias = stopping_criterias
+        self.top_n_tokens = top_n_tokens
+        self.top_n_tokens_tensor = top_n_tokens_tensor
+        self.max_input_length = max_input_length
+        self.padding_right_offset = new_padding_right_offset
+        self.max_tokens = max_tokens
+
+        # TODO
+        # Kept it simple by just updating the state, maybe updating the other CPU values is necessary.
+        self.inference_params.conv_states = self.inference_params.conv_states[
+            :, indices
+        ]
+        self.inference_params.ssm_states = self.inference_params.ssm_states[:, indices]
+        return self
+
+    @classmethod
+    def concatenate(cls, batches: List["MambaBatch"]) -> "MambaBatch":
+        # Used for padding
+        total_batch_size = 0
+        max_input_length = 0
+        padding_right_offset = 0
+        for batch in batches:
+            total_batch_size += len(batch)
+            max_input_length = max(max_input_length, batch.max_input_length)
+            padding_right_offset = max(padding_right_offset, batch.padding_right_offset)
+
+        # Batch attributes
+        requests = []
+        requests_idx_mapping = {}
+        input_lengths = []
+        prefix_offsets = []
+        read_offsets = []
+        all_input_ids = []
+        next_token_choosers = []
+        stopping_criterias = []
+        top_n_tokens = []
+        max_tokens = 0
+        max_seqlen = 0
+        seqlen_offset = 0
+
+        (n_blocks, _, d_inner, d_conv) = batches[0].inference_params.conv_states.shape
+        (_, _, _, d_state) = batches[0].inference_params.ssm_states.shape
+        dtype = batches[0].inference_params.conv_states.dtype
+        device = batches[0].inference_params.conv_states.device
+        inference_params = new_inference_params(
+            n_blocks=n_blocks,
+            batch_size=total_batch_size,
+            d_state=d_state,
+            d_conv=d_conv,
+            d_inner=d_inner,
+            seqlen_offset=seqlen_offset,
+            device=device,
+            dtype=dtype,
+        )
+
+        # Batch tensors
+        input_ids = None
+        top_n_tokens_tensor = None
+
+        # Used for slicing correctly inside the tensors
+        # Equivalent to a cumsum on batch sizes
+        start_index = 0
+        for i, batch in enumerate(batches):
+            requests.extend(batch.requests)
+            input_lengths.extend(batch.input_lengths)
+            prefix_offsets.extend(batch.prefix_offsets)
+            read_offsets.extend(batch.read_offsets)
+            all_input_ids.extend(batch.all_input_ids)
+            next_token_choosers.extend(batch.next_token_choosers)
+            stopping_criterias.extend(batch.stopping_criterias)
+            top_n_tokens.extend(batch.top_n_tokens)
+
+            if i == 0:
+                requests_idx_mapping = batch.requests_idx_mapping
+            else:
+                # We need to offset the mapping for each batch by the cumulative batch size
+                for k, v in batch.requests_idx_mapping.items():
+                    requests_idx_mapping[k] = v + start_index
+
+            # Slicing end index for this batch
+            end_index = start_index + len(batch)
+
+            # Create empty tensor
+            # input_ids is always of shape [batch_size, 1]
+            # We do not need to pad it
+            if input_ids is None:
+                input_ids = batch.input_ids.new_empty((total_batch_size, 1))
+            # Copy to correct indices
+            input_ids[start_index:end_index] = batch.input_ids
+
+            if top_n_tokens_tensor is None:
+                top_n_tokens_tensor = batches[0].top_n_tokens_tensor.new_zeros(
+                    total_batch_size,
+                )
+            top_n_tokens_tensor[start_index:end_index] = batch.top_n_tokens_tensor
+
+            # Add eventual padding tokens that were added while concatenating
+            max_tokens += batch.max_tokens + (
+                max_input_length - batch.max_input_length
+            ) * len(batch)
+
+            inference_params.max_seqlen = max(
+                inference_params.max_seqlen, batch.inference_params.max_seqlen
+            )
+            assert batch.inference_params.seqlen_offset != 0, "Invalid seqlen offset"
+            inference_params.seqlen_offset = max(
+                inference_params.seqlen_offset, batch.inference_params.seqlen_offset
+            )
+
+            inference_params.conv_states[:, start_index:end_index] = (
+                batch.inference_params.conv_states
+            )
+            inference_params.ssm_states[:, start_index:end_index] = (
+                batch.inference_params.ssm_states
+            )
+
+            start_index = end_index
+
+        return cls(
+            batch_id=batches[0].batch_id,
+            requests=requests,
+            requests_idx_mapping=requests_idx_mapping,
+            input_ids=input_ids,
+            all_input_ids=all_input_ids,
+            input_lengths=input_lengths,
+            prefix_offsets=prefix_offsets,
+            read_offsets=read_offsets,
+            next_token_choosers=next_token_choosers,
+            stopping_criterias=stopping_criterias,
+            top_n_tokens=top_n_tokens,
+            top_n_tokens_tensor=top_n_tokens_tensor,
+            max_input_length=max_input_length,
+            padding_right_offset=padding_right_offset,
+            keys_head_dim_last=batches[0].keys_head_dim_last,
+            max_tokens=max_tokens,
+            inference_params=inference_params,
+        )
+
+    def __len__(self):
+        return len(self.requests)
+
+
+class Mamba(Model):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, _rank, world_size = initialize_torch_distributed()
+        if world_size > 1:
+            raise RuntimeError("Mamba does not support Tensor Parallelism (TP)")
+        self.cuda_graphs = {}
+        if torch.cuda.is_available():
+            device = torch.device("cuda")
+            # Bf16 is important. In f16 accumulations in the matmul are causing
+            # differences while the server is under load.
+            # This is detectable by the integration load test
+            dtype = torch.bfloat16 if dtype is None else dtype
+        else:
+            if quantize:
+                raise ValueError("quantization is not available on CPU")
+
+            device = torch.device("cpu")
+            dtype = torch.float32 if dtype is None else dtype
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            "EleutherAI/gpt-neox-20b",
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+        config = MambaConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+
+        tokenizer.bos_token_id = config.bos_token_id
+        tokenizer.eos_token_id = config.eos_token_id
+        tokenizer.pad_token = tokenizer.eos_token
+
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+        torch.distributed.barrier(group=self.process_group)
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        model = MambaModel(config, weights)
+        torch.distributed.barrier(group=self.process_group)
+        super(Mamba, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            requires_padding=True,
+            dtype=dtype,
+            device=device,
+        )
+
+    @property
+    def batch_type(self) -> Type[MambaBatch]:
+        return MambaBatch
+
+    def warmup(self, batch) -> Optional[int]:
+        # TODO: implement warmup for Mamba if needed
+        if CUDA_GRAPHS:
+            if self.speculate is None or self.speculate == 0:
+                try:
+                    logger.info(f"Cuda Graphs are enabled for sizes {CUDA_GRAPHS}")
+                    # Warmup cuda graphs
+                    for bs in CUDA_GRAPHS:
+                        self.cuda_graph_warmup(bs)
+                except Exception:
+                    logger.exception(f"Decode cuda graph warmup failed")
+
+        return None
+
+    def cuda_graph_warmup(self, batch_size: int):
+        input_ids = torch.zeros((batch_size, 1), dtype=torch.int64, device=self.device)
+        n_blocks = len(self.model.blocks)
+
+        d_state = self.model.config.d_state
+        d_conv = self.model.config.d_conv
+        # Inner takes the expand multiplication
+        d_inner = self.model.config.d_inner
+
+        # Important seqlen_offset to go through the update mecanism with the state
+        seqlen_offset = 1
+        inference_params = new_inference_params(
+            n_blocks=n_blocks,
+            batch_size=batch_size,
+            d_state=d_state,
+            d_conv=d_conv,
+            d_inner=d_inner,
+            seqlen_offset=seqlen_offset,
+            device=self.device,
+            dtype=self.dtype,
+        )
+
+        graph = torch.cuda.CUDAGraph()
+
+        torch.cuda.synchronize()
+        # Run once outside to warmup
+        self.model.forward(input_ids=input_ids, inference_params=inference_params)
+        torch.cuda.synchronize()
+
+        with torch.cuda.graph(graph, pool=MEM_POOL):
+            logits, speculative_logits = self.model.forward(
+                input_ids=input_ids, inference_params=inference_params
+            )
+        torch.cuda.synchronize()
+        graph_dict = {
+            "input_ids": input_ids,
+            "inference_params": inference_params,
+            "graph": graph,
+            "logits": logits,
+            "speculative_logits": speculative_logits,
+        }
+        self.cuda_graphs[batch_size] = graph_dict
+
+    def forward(
+        self, input_ids: torch.Tensor, inference_params: Any
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        bs = input_ids.shape[0]
+        padded_bs = bs
+        if bs == 3:
+            padded_bs = 4
+        elif 3 < bs <= 8:
+            padded_bs = 8
+        elif bs > 8:
+            padded_bs = (bs + 7) // 8 * 8
+
+        # Try to find an associated cuda graph
+        cuda_graph = self.cuda_graphs.get(padded_bs, None)
+        is_prefill = inference_params is None or inference_params.seqlen_offset == 0
+
+        if is_prefill or cuda_graph is None:
+            return self.model(
+                input_ids,
+                inference_params=inference_params,
+            )
+
+        # Copy inputs to the static inputs of the cuda graph
+        # Static inputs are potentially padded
+        cuda_graph["input_ids"][:bs] = input_ids
+        cuda_graph["inference_params"].conv_states[
+            :, :bs
+        ] = inference_params.conv_states
+        cuda_graph["inference_params"].ssm_states[:, :bs] = inference_params.ssm_states
+
+        # Replay the graph
+        cuda_graph["graph"].replay()
+
+        inference_params.conv_states.copy_(
+            cuda_graph["inference_params"].conv_states[:, :bs]
+        )
+        inference_params.ssm_states.copy_(
+            cuda_graph["inference_params"].ssm_states[:, :bs]
+        )
+        # Slice output to the correct shape
+        speculative_logits = (
+            cuda_graph["speculative_logits"][:bs]
+            if cuda_graph["speculative_logits"] is not None
+            else None
+        )
+        logits = cuda_graph["logits"][:bs]
+        return logits, speculative_logits
+
+    def generate_token(self, batch) -> Tuple[List[Any], Optional[Any], Tuple[int, int]]:
+        start = time.time_ns()
+        input_ids = (
+            batch.input_ids
+        )  # batch.past_input_ids if batch.past_input_ids is not None else batch.input_ids
+
+        batch_size, max_seqlen = input_ids.shape
+        # Inference params
+
+        if batch.inference_params is None:
+            # 0 is important here
+            seqlen_offset = 0
+            n_blocks = len(self.model.blocks)
+            d_state = self.model.config.d_state
+            d_conv = self.model.config.d_conv
+            d_inner = self.model.config.d_inner
+            inference_params = new_inference_params(
+                n_blocks=n_blocks,
+                batch_size=batch_size,
+                d_state=d_state,
+                d_conv=d_conv,
+                d_inner=d_inner,
+                seqlen_offset=seqlen_offset,
+                device=self.device,
+                dtype=self.dtype,
+            )
+            batch.inference_params = inference_params
+
+        # Forward pass
+        logits, speculative_logits = self.forward(
+            input_ids, inference_params=batch.inference_params
+        )
+
+        # batch.inference_params = new_inference_params
+        # Results
+        generations: List[Generation] = []
+        stopped = True
+
+        # Speculation is not active for causal
+        accepted_ids = torch.ones_like(batch.input_ids)[:, 0]
+        batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
+            batch.top_n_tokens,
+            batch.top_n_tokens_tensor,
+            torch.log_softmax(logits[:, -1], -1),
+            accepted_ids,
+        )
+
+        start_decode = time.time_ns()
+
+        # Zipped iterator
+        iterator = zip(
+            batch.requests,
+            batch.input_lengths,
+            batch.prefix_offsets,
+            batch.read_offsets,
+            logits,
+            batch.next_token_choosers,
+            batch.stopping_criterias,
+            batch.all_input_ids,
+            batch.top_n_tokens,
+            batch_top_token_ids,
+            batch_top_token_logprobs,
+        )
+
+        # For each member of the batch
+        for i, (
+            request,
+            input_length,
+            prefix_offset,
+            read_offset,
+            logits,
+            next_token_chooser,
+            stopping_criteria,
+            all_input_ids,
+            top_n_tokens,
+            top_token_ids,
+            top_token_logprobs,
+        ) in enumerate(iterator):
+            # Select next token
+            next_token_id, logprobs = next_token_chooser(
+                all_input_ids.view(1, -1), logits[-1:, :]
+            )
+
+            # Append next token to all tokens
+            all_input_ids = torch.cat([all_input_ids, next_token_id])
+            new_input_length = input_length + 1
+
+            # Generated token
+            next_token_logprob = logprobs[-1, next_token_id]
+            next_token_id_squeezed = next_token_id.squeeze()
+            next_token_text, prefix_offset, read_offset = self.decode_token(
+                all_input_ids[:, 0], prefix_offset, read_offset
+            )
+
+            # Evaluate stopping criteria
+            stop, reason = stopping_criteria(
+                next_token_id_squeezed,
+                next_token_text,
+            )
+
+            if not stop:
+                stopped = False
+
+            # Shard generations
+            # All generations will be appended in the rust sharded client
+            if i % self.world_size == self.rank:
+                if stop:
+                    # Decode generated tokens
+                    output_text, _, _ = self.decode_token(
+                        all_input_ids[:, 0],
+                        prefix_offset=len(all_input_ids)
+                        - stopping_criteria.current_tokens
+                        - 1,
+                        read_offset=len(all_input_ids)
+                        - stopping_criteria.current_tokens,
+                        skip_special_tokens=True,
+                    )
+                    # Get seed
+                    if isinstance(next_token_chooser.choice, Sampling):
+                        seed = next_token_chooser.choice.seed
+                    else:
+                        seed = None
+
+                    generated_text = GeneratedText(
+                        output_text, stopping_criteria.current_tokens, reason, seed
+                    )
+                else:
+                    generated_text = None
+
+                if stopping_criteria.current_tokens == 1 and request.prefill_logprobs:
+                    # Remove generated token to only have prefill and add nan for first prompt token
+                    prefill_logprobs = [float("nan")] + torch.log_softmax(
+                        logits, -1
+                    ).gather(1, all_input_ids[1:]).squeeze(1)[
+                        -new_input_length:-1
+                    ].tolist()
+                    prefill_token_ids = all_input_ids[-new_input_length:-1]
+                    prefill_texts = self.tokenizer.batch_decode(
+                        prefill_token_ids,
+                        clean_up_tokenization_spaces=False,
+                        skip_special_tokens=False,
+                    )
+                    prefill_tokens = Tokens(
+                        prefill_token_ids,
+                        prefill_logprobs,
+                        prefill_texts,
+                        is_special=[],
+                    )
+                else:
+                    prefill_tokens = None
+
+                if top_n_tokens > 0:
+                    toptoken_texts = self.tokenizer.batch_decode(
+                        top_token_ids,
+                        clean_up_tokenization_spaces=False,
+                        skip_special_tokens=False,
+                    )
+                    special_toptokens = [
+                        token_id in self.all_special_ids for token_id in top_token_ids
+                    ]
+                    top_tokens = Tokens(
+                        top_token_ids,
+                        top_token_logprobs,
+                        toptoken_texts,
+                        special_toptokens,
+                    )
+                else:
+                    top_tokens = None
+
+                generation = Generation(
+                    request.id,
+                    prefill_tokens,
+                    Tokens(
+                        [next_token_id_squeezed],
+                        [next_token_logprob],
+                        [next_token_text],
+                        [next_token_id_squeezed.item() in self.all_special_ids],
+                    ),
+                    generated_text,
+                    top_tokens,
+                )
+
+                generations.append(generation)
+
+                # Update values
+                batch.next_token_choosers[i] = batch.next_token_choosers[
+                    i
+                ].advance_grammar(next_token_id_squeezed.item())
+                batch.input_ids[i, 0] = next_token_id
+                batch.all_input_ids[i] = all_input_ids
+                batch.input_lengths[i] = new_input_length
+                batch.prefix_offsets[i] = prefix_offset
+                batch.read_offsets[i] = read_offset
+                batch.max_input_length = max(batch.max_input_length, new_input_length)
+
+        # We finished all generations in the batch; there is no next batch
+        if stopped:
+            forward_ns = start_decode - start
+            decode_ns = time.time_ns() - start_decode
+            return generations, None, (forward_ns, decode_ns)
+
+        # Slice unused values from prefill
+        batch.input_ids = batch.input_ids[:, :1]
+
+        forward_ns = start_decode - start
+        decode_ns = time.time_ns() - start_decode
+        return generations, batch, (forward_ns, decode_ns)
diff --git a/server/text_generation_server/models/model.py b/server/text_generation_server/models/model.py
index 73e1f1af..372c48c0 100644
--- a/server/text_generation_server/models/model.py
+++ b/server/text_generation_server/models/model.py
@@ -5,7 +5,8 @@ from abc import ABC, abstractmethod
 from typing import List, Optional, Tuple, Type, TypeVar
 from transformers import PreTrainedTokenizerBase
 
-from text_generation_server.models.types import Batch, GeneratedText
+from text_generation_server.models.types import Batch, Generation
+from text_generation_server.utils.speculate import get_speculate
 from text_generation_server.pb.generate_pb2 import InfoResponse
 
 B = TypeVar("B", bound=Batch)
@@ -22,6 +23,7 @@ class Model(ABC):
         rank: int = 0,
         world_size: int = 1,
         kwargs: dict = {},
+        speculate: Optional[int] = None,
     ):
         self.model = model
         self.tokenizer = tokenizer
@@ -32,7 +34,14 @@ class Model(ABC):
         self.rank = rank
         self.world_size = world_size
         self.kwargs = kwargs
-        self.has_position_ids = inspect.signature(model.forward).parameters.get("position_ids", None) is not None
+        if speculate is None:
+            speculate = get_speculate()
+        self.speculate = speculate
+
+        self.has_position_ids = (
+            inspect.signature(model.forward).parameters.get("position_ids", None)
+            is not None
+        )
 
         self.check_initialized()
 
@@ -42,6 +51,7 @@ class Model(ABC):
             requires_padding=self.requires_padding,
             dtype=str(self.dtype),
             device_type=self.device.type,
+            speculate=self.speculate,
         )
 
     @property
@@ -50,7 +60,9 @@ class Model(ABC):
         raise NotImplementedError
 
     @abstractmethod
-    def generate_token(self, batch: B) -> Tuple[List[GeneratedText], Optional[B]]:
+    def generate_token(
+        self, batch: B
+    ) -> Tuple[List[Generation], Optional[B], Tuple[int, int]]:
         raise NotImplementedError
 
     def warmup(self, batch: B, max_total_tokens: int):
diff --git a/server/text_generation_server/models/mpt.py b/server/text_generation_server/models/mpt.py
index 19de497c..6b3f29a6 100644
--- a/server/text_generation_server/models/mpt.py
+++ b/server/text_generation_server/models/mpt.py
@@ -43,6 +43,7 @@ class MPTSharded(CausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -75,13 +76,14 @@ class MPTSharded(CausalLM):
             config = json.load(f)
         config = PretrainedConfig(**config)
         config.quantize = quantize
+        config.use_medusa = use_medusa
 
         torch.distributed.barrier(group=self.process_group)
 
         filenames = weight_files(model_id, revision=revision, extension=".safetensors")
         weights = Weights(filenames, device, dtype, process_group=self.process_group)
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         config.quantize = quantize
         model = MPTForCausalLM(config, weights)
diff --git a/server/text_generation_server/models/opt.py b/server/text_generation_server/models/opt.py
index b2b87246..703e5b58 100644
--- a/server/text_generation_server/models/opt.py
+++ b/server/text_generation_server/models/opt.py
@@ -22,6 +22,7 @@ class OPTSharded(CausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -47,6 +48,7 @@ class OPTSharded(CausalLM):
             trust_remote_code=trust_remote_code,
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
         tokenizer.pad_token_id = config.pad_token_id
 
         torch.distributed.barrier(group=self.process_group)
@@ -55,7 +57,7 @@ class OPTSharded(CausalLM):
             filenames, device=device, dtype=dtype, process_group=self.process_group
         )
         if config.quantize == "gptq":
-            weights._set_gptq_params(model_id)
+            weights._set_gptq_params(model_id, revision)
 
         model = OPTForCausalLM(config, weights)
 
diff --git a/server/text_generation_server/models/phi.py b/server/text_generation_server/models/phi.py
new file mode 100644
index 00000000..cc4e2505
--- /dev/null
+++ b/server/text_generation_server/models/phi.py
@@ -0,0 +1,68 @@
+import torch
+import torch.distributed
+
+from transformers import AutoConfig, AutoTokenizer
+from typing import Optional, List, Tuple
+
+from text_generation_server.models import CausalLM
+from text_generation_server.models.custom_modeling.phi_modeling import (
+    PhiConfig,
+    PhiForCausalLM,
+)
+from text_generation_server.utils import (
+    initialize_torch_distributed,
+    weight_files,
+    Weights,
+)
+
+
+class Phi(CausalLM):
+    def __init__(
+        self,
+        model_id: str,
+        revision: Optional[str] = None,
+        quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
+        dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
+    ):
+        self.process_group, _rank, _world_size = initialize_torch_distributed()
+        if torch.cuda.is_available():
+            device = torch.device("cuda")
+            dtype = torch.float16 if dtype is None else dtype
+        else:
+            if quantize:
+                raise ValueError("quantization is not available on CPU")
+
+            device = torch.device("cpu")
+            dtype = torch.float32 if dtype is None else dtype
+
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            revision=revision,
+            padding_side="left",
+            truncation_side="left",
+            trust_remote_code=trust_remote_code,
+        )
+        config = PhiConfig.from_pretrained(
+            model_id, revision=revision, trust_remote_code=trust_remote_code
+        )
+
+        tokenizer.bos_token_id = config.bos_token_id
+        tokenizer.eos_token_id = config.eos_token_id
+        tokenizer.pad_token = tokenizer.eos_token
+
+        config.quantize = quantize
+        config.use_medusa = use_medusa
+        torch.distributed.barrier(group=self.process_group)
+        filenames = weight_files(model_id, revision=revision, extension=".safetensors")
+        weights = Weights(filenames, device, dtype, process_group=self.process_group)
+        model = PhiForCausalLM(config, weights)
+        torch.distributed.barrier(group=self.process_group)
+        super(CausalLM, self).__init__(
+            model=model,
+            tokenizer=tokenizer,
+            requires_padding=True,
+            dtype=dtype,
+            device=device,
+        )
diff --git a/server/text_generation_server/models/rw.py b/server/text_generation_server/models/rw.py
index 802a4aa6..92c93542 100644
--- a/server/text_generation_server/models/rw.py
+++ b/server/text_generation_server/models/rw.py
@@ -12,9 +12,13 @@ class RW(CausalLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
+        if use_medusa:
+            raise RuntimeError("Medusa decoding is not enabled for AutoModel")
+
         if torch.cuda.is_available():
             device = torch.device("cuda")
             dtype = torch.float16 if dtype is None else dtype
@@ -36,9 +40,11 @@ class RW(CausalLM):
             model_id,
             revision=revision,
             torch_dtype=dtype,
-            device_map="auto"
-            if torch.cuda.is_available() and torch.cuda.device_count() > 1
-            else None,
+            device_map=(
+                "auto"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 1
+                else None
+            ),
             load_in_8bit=quantize == "bitsandbytes",
             trust_remote_code=trust_remote_code,
         )
diff --git a/server/text_generation_server/models/santacoder.py b/server/text_generation_server/models/santacoder.py
index ee37a03a..a887555a 100644
--- a/server/text_generation_server/models/santacoder.py
+++ b/server/text_generation_server/models/santacoder.py
@@ -15,9 +15,17 @@ class SantaCoder(CausalLM):
         self,
         model_id: str,
         revision: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
+        trust_remote_code: bool = False,
     ):
-        super().__init__(model_id=model_id, revision=revision, dtype=dtype)
+        super().__init__(
+            model_id=model_id,
+            revision=revision,
+            use_medusa=use_medusa,
+            dtype=dtype,
+            trust_remote_code=trust_remote_code,
+        )
 
         self.tokenizer.add_special_tokens(
             {
diff --git a/server/text_generation_server/models/seq2seq_lm.py b/server/text_generation_server/models/seq2seq_lm.py
index d4d3cd19..e55a661c 100644
--- a/server/text_generation_server/models/seq2seq_lm.py
+++ b/server/text_generation_server/models/seq2seq_lm.py
@@ -1,18 +1,18 @@
-from text_generation_server.utils.tokens import batch_top_tokens
 import torch
+import time
 
 from dataclasses import dataclass
 from opentelemetry import trace
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, PreTrainedTokenizerBase
 from typing import Optional, Tuple, List, Type, Dict
 
+from text_generation_server.utils.tokens import batch_top_tokens
 from text_generation_server.models import Model
 from text_generation_server.models.types import (
     GeneratedText,
     Batch,
     Generation,
-    PrefillTokens,
-    TopTokens,
+    Tokens,
 )
 from text_generation_server.pb import generate_pb2
 from text_generation_server.utils import NextTokenChooser, StoppingCriteria, Sampling
@@ -96,7 +96,9 @@ class Seq2SeqLMBatch(Batch):
             inputs.append(r.inputs)
             requests_idx_mapping[r.id] = i
             decoder_input_lengths.append(1)
-            next_token_choosers.append(NextTokenChooser.from_pb(r.parameters, device))
+            next_token_choosers.append(
+                NextTokenChooser.from_pb(r.parameters, device, tokenizer)
+            )
             stopping_criteria = StoppingCriteria.from_pb(
                 r.stopping_parameters, tokenizer
             )
@@ -351,9 +353,9 @@ class Seq2SeqLMBatch(Batch):
                     (total_batch_size, max_input_length),
                 )
             # Copy to correct indices
-            attention_mask[
-                start_index:end_index, -batch.max_input_length :
-            ] = batch.attention_mask[:, -batch.max_input_length :]
+            attention_mask[start_index:end_index, -batch.max_input_length :] = (
+                batch.attention_mask[:, -batch.max_input_length :]
+            )
 
             # Create padded tensor
             if decoder_input_ids is None:
@@ -530,9 +532,13 @@ class Seq2SeqLM(Model):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
+        if use_medusa:
+            raise RuntimeError("Medusa decoding is not enabled for AutoModel")
+
         if torch.cuda.is_available():
             device = torch.device("cuda")
             dtype = torch.float16 if dtype is None else dtype
@@ -547,9 +553,11 @@ class Seq2SeqLM(Model):
             model_id,
             revision=revision,
             torch_dtype=dtype,
-            device_map="auto"
-            if torch.cuda.is_available() and torch.cuda.device_count() > 1
-            else None,
+            device_map=(
+                "auto"
+                if torch.cuda.is_available() and torch.cuda.device_count() > 1
+                else None
+            ),
             load_in_8bit=quantize == "bitsandbytes",
             trust_remote_code=trust_remote_code,
         )
@@ -592,6 +600,7 @@ class Seq2SeqLM(Model):
         past_key_values: Optional = None,
     ) -> Tuple[
         torch.Tensor,
+        Optional[torch.Tensor],
         torch.Tensor,
         List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]],
     ]:
@@ -605,8 +614,15 @@ class Seq2SeqLM(Model):
             past_key_values=past_key_values,
             use_cache=True,
         )
+        if isinstance(outputs, tuple):
+            # Our custom models
+            outputs, speculative_logits = outputs
+        else:
+            # Generic transformers models
+            speculative_logits = None
         return (
             outputs.logits,
+            speculative_logits,
             outputs.encoder_last_hidden_state,
             outputs.past_key_values,
         )
@@ -614,7 +630,8 @@ class Seq2SeqLM(Model):
     @tracer.start_as_current_span("generate_token")
     def generate_token(
         self, batch: Seq2SeqLMBatch
-    ) -> Tuple[List[Generation], Optional[Seq2SeqLMBatch]]:
+    ) -> Tuple[List[Generation], Optional[Seq2SeqLMBatch], Tuple[int, int]]:
+        start = time.time_ns()
         if batch.decoder_attention_mask is not None:
             # slice to the correct shape
             decoder_attention_mask = batch.decoder_attention_mask[
@@ -630,7 +647,7 @@ class Seq2SeqLM(Model):
         else:
             encoder_last_hidden_state = None
 
-        logits, encoder_last_hidden_state, past = self.forward(
+        logits, speculative_logits, encoder_last_hidden_state, past = self.forward(
             batch.input_ids,
             batch.attention_mask,
             batch.decoder_input_ids,
@@ -639,12 +656,17 @@ class Seq2SeqLM(Model):
             batch.past_key_values,
         )
 
+        # Speculation is not active for seq2seq
+        accepted_ids = torch.ones_like(batch.decoder_input_ids)[:, 0]
         batch_top_token_ids, batch_top_token_logprobs = batch_top_tokens(
             batch.top_n_tokens,
             batch.top_n_tokens_tensor,
             torch.log_softmax(logits[:, -1], -1),
+            accepted_ids,
         )
 
+        start_decode = time.time_ns()
+
         # Finished requests
         generations: List[Generation] = []
         stopped = True
@@ -733,39 +755,49 @@ class Seq2SeqLM(Model):
 
                 # Prefill
                 if stopping_criteria.current_tokens == 1 and request.prefill_logprobs:
-                    prefill_tokens = PrefillTokens(
+                    prefill_tokens = Tokens(
                         [self.tokenizer.bos_token_id],
                         [float("nan")],
                         [self.tokenizer.bos_token],
+                        [False],
                     )
                 else:
                     prefill_tokens = None
 
                 if top_n_tokens > 0:
-                    toptoken_texts = self.tokenizer.batch_decode(
-                        top_token_ids,
-                        clean_up_tokenization_spaces=False,
-                        skip_special_tokens=False,
-                    )
-                    special_toptokens = [
-                        token_id in self.all_special_ids for token_id in top_token_ids
-                    ]
-                    top_tokens = TopTokens(
-                        top_token_ids,
-                        top_token_logprobs,
-                        toptoken_texts,
-                        special_toptokens,
-                    )
+                    all_top_tokens = []
+                    for top_token_ids, top_token_logprobs in zip(
+                        top_token_ids, top_token_logprobs
+                    ):
+                        toptoken_texts = self.tokenizer.batch_decode(
+                            top_token_ids,
+                            clean_up_tokenization_spaces=False,
+                            skip_special_tokens=False,
+                        )
+                        special_toptokens = [
+                            token_id in self.all_special_ids
+                            for token_id in top_token_ids
+                        ]
+                        top_tokens = Tokens(
+                            top_token_ids,
+                            top_token_logprobs,
+                            toptoken_texts,
+                            special_toptokens,
+                        )
+                        all_top_tokens.append(top_tokens)
+                    top_tokens = all_top_tokens
                 else:
                     top_tokens = None
 
                 generation = Generation(
                     request.id,
                     prefill_tokens,
-                    next_token_id_squeezed,
-                    next_token_logprob,
-                    next_token_text,
-                    next_token_id_squeezed.item() in self.all_special_ids,
+                    Tokens(
+                        [next_token_id_squeezed],
+                        [next_token_logprob],
+                        [next_token_text],
+                        [next_token_id_squeezed.item() in self.all_special_ids],
+                    ),
                     generated_text,
                     top_tokens,
                 )
@@ -773,6 +805,9 @@ class Seq2SeqLM(Model):
                 generations.append(generation)
 
             # Update values
+            batch.next_token_choosers[i] = batch.next_token_choosers[i].advance_grammar(
+                next_token_id_squeezed.item()
+            )
             batch.decoder_input_ids[i] = next_token_id
             batch.all_decoder_input_ids[i] = all_decoder_input_ids
             batch.input_lengths[i] = input_length
@@ -786,7 +821,9 @@ class Seq2SeqLM(Model):
 
         # We finished all generations in the batch; there is no next batch
         if stopped:
-            return generations, None
+            forward_ns = start_decode - start
+            decode_ns = time.time_ns() - start_decode
+            return generations, None, (forward_ns, decode_ns)
 
         # We don't need input_ids after the prefill forward
         batch.input_ids = None
@@ -797,4 +834,6 @@ class Seq2SeqLM(Model):
             batch.decoder_attention_mask[:, -batch.padding_right_offset] = 1
         batch.padding_right_offset -= 1
 
-        return generations, batch
+        forward_ns = start_decode - start
+        decode_ns = time.time_ns() - start_decode
+        return generations, batch, (forward_ns, decode_ns)
diff --git a/server/text_generation_server/models/t5.py b/server/text_generation_server/models/t5.py
index 161e69ba..3f3cb965 100644
--- a/server/text_generation_server/models/t5.py
+++ b/server/text_generation_server/models/t5.py
@@ -25,6 +25,7 @@ class T5Sharded(Seq2SeqLM):
         model_id: str,
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
+        use_medusa: Optional[str] = None,
         dtype: Optional[torch.dtype] = None,
         trust_remote_code: bool = False,
     ):
@@ -42,6 +43,7 @@ class T5Sharded(Seq2SeqLM):
             trust_remote_code=trust_remote_code,
         )
         config.quantize = quantize
+        config.use_medusa = use_medusa
 
         tokenizer = AutoTokenizer.from_pretrained(
             model_id,
@@ -94,7 +96,7 @@ class T5Sharded(Seq2SeqLM):
         List[Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]],
     ]:
         # Model Forward
-        outputs = self.model.forward(
+        outputs, speculative_logits = self.model.forward(
             input_ids=input_ids,
             attention_mask=attention_mask,
             decoder_input_ids=decoder_input_ids,
@@ -106,6 +108,7 @@ class T5Sharded(Seq2SeqLM):
 
         return (
             outputs.logits,
+            speculative_logits,
             outputs.encoder_last_hidden_state,
             outputs.past_key_values,
         )
diff --git a/server/text_generation_server/models/types.py b/server/text_generation_server/models/types.py
index 0e27680d..339b733b 100644
--- a/server/text_generation_server/models/types.py
+++ b/server/text_generation_server/models/types.py
@@ -58,29 +58,14 @@ class GeneratedText:
 
 
 @dataclass
-class PrefillTokens:
-    token_ids: List[int]
-    logprobs: List[float]
-    texts: List[str]
-
-    def to_pb(self) -> generate_pb2.PrefillTokens:
-        return generate_pb2.PrefillTokens(
-            ids=self.token_ids, logprobs=self.logprobs, texts=self.texts
-        )
-
-    def __len__(self):
-        return len(self.token_ids)
-
-
-@dataclass
-class TopTokens:
+class Tokens:
     token_ids: List[int]
     logprobs: List[float]
     texts: List[str]
     is_special: List[bool]
 
-    def to_pb(self) -> generate_pb2.TopTokens:
-        return generate_pb2.TopTokens(
+    def to_pb(self) -> generate_pb2.Tokens:
+        return generate_pb2.Tokens(
             ids=self.token_ids,
             logprobs=self.logprobs,
             texts=self.texts,
@@ -94,27 +79,25 @@ class TopTokens:
 @dataclass
 class Generation:
     request_id: int
-    prefill_tokens: Optional[PrefillTokens]
-    token_id: int
-    token_logprob: float
-    token_text: str
-    token_is_special: bool
+    prefill_tokens: Optional[Tokens]
+    tokens: Tokens
     generated_text: Optional[GeneratedText]
     # Optional for now, since it's not yet supported for every model.
-    top_tokens: Optional[TopTokens]
+    top_tokens: Optional[List[Tokens]]
 
     def to_pb(self) -> generate_pb2.Generation:
         return generate_pb2.Generation(
             request_id=self.request_id,
-            prefill_tokens=self.prefill_tokens.to_pb()
-            if self.prefill_tokens is not None
-            else None,
-            token_id=self.token_id,
-            token_logprob=self.token_logprob,
-            token_text=self.token_text,
-            token_is_special=self.token_is_special,
-            generated_text=self.generated_text.to_pb()
-            if self.generated_text is not None
-            else None,
-            top_tokens=self.top_tokens.to_pb() if self.top_tokens is not None else None,
+            prefill_tokens=(
+                self.prefill_tokens.to_pb() if self.prefill_tokens is not None else None
+            ),
+            tokens=self.tokens.to_pb(),
+            generated_text=(
+                self.generated_text.to_pb() if self.generated_text is not None else None
+            ),
+            top_tokens=(
+                [top_tokens.to_pb() for top_tokens in self.top_tokens]
+                if self.top_tokens is not None
+                else None
+            ),
         )
diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
new file mode 100644
index 00000000..16042fc9
--- /dev/null
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -0,0 +1,329 @@
+import re
+import torch
+import math
+from PIL import Image
+from io import BytesIO
+import base64
+
+from opentelemetry import trace
+from typing import Optional, Tuple, List, Type, Dict
+
+from transformers import PreTrainedTokenizerBase
+from transformers.image_processing_utils import select_best_resolution
+from text_generation_server.pb import generate_pb2
+from text_generation_server.models.flash_mistral import (
+    BaseFlashMistral,
+    FlashMistralBatch,
+)
+from text_generation_server.models.cache_manager import (
+    get_cache_manager,
+)
+
+tracer = trace.get_tracer(__name__)
+
+IMAGES = re.compile(r"!\[[^\]]*\]\((.*?)\s*(\"(?:.*[^\"])\")?\s*\)")
+
+
+def split(string) -> List[Dict[str, str]]:
+    parts = []
+    cursor = 0
+    for pattern in IMAGES.finditer(string):
+        start = pattern.start()
+        if start != cursor:
+            parts.append({"type": "text", "content": string[cursor:start]})
+
+        parts.append({"type": "image", "content": pattern.group(1)})
+        cursor = pattern.end()
+
+    if cursor != len(string):
+        parts.append({"type": "text", "content": string[cursor:]})
+
+    return parts
+
+
+def get_anyres_image_grid_shape(image_size, grid_pinpoints, patch_size):
+    """
+    Calculate the shape of the image patch grid after the preprocessing for images of any resolution.
+
+    Args:
+        image_size (`tuple`):
+            The size of the input image in the format (width, height).
+        grid_pinpoints (`List`):
+            A list containing possible resolutions. Each item in the list should be a tuple or list
+            of the form `(height, width)`.
+        patch_size (`int`):
+            The size of each image patch.
+
+    Returns:
+        tuple: The shape of the image patch grid in the format (width, height).
+    """
+    if not isinstance(grid_pinpoints, list):
+        raise ValueError("grid_pinpoints should be a list of tuples or lists")
+
+    height, width = select_best_resolution(image_size, grid_pinpoints)
+    return height // patch_size, width // patch_size
+
+
+def get_number_of_features(height: int, width: int, config) -> int:
+    # From config
+    # Hardcoded for CLIP for now
+    # image_grid_pinpoints = [[336, 672], [672, 336], [672, 672], [1008, 336], [336, 1008]]
+    image_grid_pinpoints = config.image_grid_pinpoints
+    image_size = config.vision_config.image_size
+    patch_size = config.vision_config.patch_size
+
+    assert image_size % patch_size == 0
+
+    npatches = image_size // patch_size
+
+    num_patch_height, num_patch_width = get_anyres_image_grid_shape(
+        [height, width],
+        image_grid_pinpoints,
+        image_size,
+    )
+
+    height_of_patch = math.ceil(height / width * npatches)
+
+    unpadded_features = npatches * height_of_patch * num_patch_height * num_patch_width
+    # They are only added after width
+    newline_features = height_of_patch * num_patch_width
+    # The base patch covers the entire image
+    base_features = npatches**2
+    return unpadded_features + newline_features + base_features
+
+
+def load_data_uri(image_uri: str) -> Image.Image:
+    image_uri = image_uri.split(",")[-1]
+    content = base64.b64decode(image_uri)
+    image = Image.open(BytesIO(content))
+    return image
+
+
+# assert get_number_of_features(889, 1024) == 2634, f"{get_number_of_features(889, 1024)}"
+# assert get_number_of_features(640, 640) == 2928
+
+
+class VlmCausalLMBatch(FlashMistralBatch):
+    pixel_values: Optional[List[torch.Tensor]]
+    image_sizes: Optional[List[Tuple[int, int]]]
+
+    @classmethod
+    @tracer.start_as_current_span("concatenate")
+    def concatenate(cls, batches):
+        batch = super(VlmCausalLMBatch, cls).concatenate(batches)
+        batch.pixel_values = None
+        batch.image_sizes = None
+        return batch
+
+    @tracer.start_as_current_span("filter")
+    def filter(self, request_ids: List[int]):
+        batch = super().filter(request_ids)
+        batch.pixel_values = None
+        batch.image_sizes = None
+        return batch
+
+    @classmethod
+    def batch_tokenized_inputs(cls, requests, tokenizer, processor, config):
+        batch_inputs = []
+        image_inputs = []
+        max_truncation = 0
+        for r in requests:
+            chunks = split(r.inputs)
+            full_text = ""
+            for chunk in chunks:
+                if chunk["type"] == "text":
+                    full_text += chunk["content"]
+                elif chunk["type"] == "image":
+                    image = chunk["content"]
+                    # Should never receive URLs anymore, processing should be done
+                    # On the rust layer.
+                    # This avoid making n queries per TP
+                    # if image.startswith("https://") or image.startswith("http://"):
+                    #     image = processor.image_processor.fetch_images(image)
+                    if image.startswith("data:"):
+                        image = load_data_uri(image)
+                    else:
+                        raise RuntimeError(
+                            "Cannot process input image not starting with data:"
+                        )
+                    image_input = processor.image_processor(image, return_tensors="pt")
+                    height, width = image_input["image_sizes"][0]
+                    num_features = get_number_of_features(height, width, config)
+                    full_text += "<image>" * num_features
+                    image_inputs.append(image_input)
+                else:
+                    raise RuntimeError(f"Invalid chunk type {chunk['type']}")
+
+            batch_inputs.append(full_text)
+            max_truncation = max(max_truncation, r.truncate)
+
+        batch_tokenized_inputs = tokenizer(
+            batch_inputs, truncation=True, max_length=max_truncation
+        )["input_ids"]
+        if image_inputs:
+            image_inputs = {
+                "pixel_values": torch.cat(
+                    [img["pixel_values"] for img in image_inputs], dim=0
+                ),
+                "image_sizes": torch.cat([img["image_sizes"] for img in image_inputs]),
+            }
+        else:
+            image_inputs = None
+        return batch_tokenized_inputs, image_inputs
+
+    @classmethod
+    def from_pb_processor(
+        cls,
+        pb: generate_pb2.Batch,
+        tokenizer: PreTrainedTokenizerBase,
+        processor,
+        config,
+        dtype: torch.dtype,
+        device: torch.device,
+    ) -> "VlmCausalLMBatch":
+        batch_tokenized_inputs, image_inputs = cls.batch_tokenized_inputs(
+            pb.requests, tokenizer, processor, config
+        )
+        batch = cls.from_tokenized(pb, tokenizer, batch_tokenized_inputs, dtype, device)
+        if image_inputs is not None:
+            batch.pixel_values = image_inputs["pixel_values"].to(device=device)
+            batch.image_sizes = image_inputs["image_sizes"].to(device=device)
+        else:
+            batch.pixel_values = None
+            batch.image_sizes = None
+        return batch
+
+
+class VlmCausalLM(BaseFlashMistral):
+    @property
+    def batch_type(self) -> Type[VlmCausalLMBatch]:
+        return VlmCausalLMBatch
+
+    def get_layer_config(self, model) -> Tuple[int, int, int]:
+        return (
+            len(model.language_model.model.layers),
+            model.language_model.model.num_key_value_heads,
+            model.language_model.model.head_size,
+        )
+
+    def max_past(self) -> Optional[int]:
+        return getattr(self.model.language_model, "max_past", None)
+
+    def forward(
+        self, batch: VlmCausalLMBatch
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        # Model Forward
+        if batch.speculative_ids is not None:
+            input_ids = batch.input_ids
+            position_ids = batch.position_ids
+            cu_seqlen_prefill = batch.cu_seqlen_prefill
+            kv_cache = get_cache_manager().kv_cache
+            block_tables = batch.block_tables_tensor
+            slots = batch.slots[batch.slot_indices]
+            input_lengths = batch.input_lengths_tensor
+            max_s = batch.max_seqlen
+            lm_head_indices = batch.prefill_head_indices
+
+            speculative_ids = batch.speculative_ids
+
+            B, speculative_length = speculative_ids.shape
+            new_length = speculative_length + 1
+            new_input_ids = torch.cat(
+                [input_ids.unsqueeze(-1), speculative_ids], dim=1
+            ).reshape(-1)
+            arange = torch.arange(new_length, device=position_ids.device).unsqueeze(0)
+            arange_int = arange.to(dtype=torch.int32)
+            new_position_ids = (
+                position_ids.unsqueeze(-1).expand(B, new_length) + arange
+            ).view(-1)
+            slots = (slots.unsqueeze(-1).expand(B, new_length) + arange_int).view(-1)
+            input_lengths = (
+                input_lengths.unsqueeze(-1).expand(B, new_length) + arange_int
+            ).view(-1)
+
+            # Add Copy the block tables for all members
+            block_tables = (
+                block_tables.unsqueeze(1)
+                .expand(B, new_length, -1)
+                .reshape(B * new_length, -1)
+                .contiguous()
+            )
+            max_s = max_s + speculative_length
+
+            input_ids = new_input_ids
+            position_ids = new_position_ids
+        else:
+            input_ids = batch.input_ids
+            position_ids = batch.position_ids
+            cu_seqlen_prefill = batch.cu_seqlen_prefill
+            kv_cache = get_cache_manager().kv_cache
+            block_tables = batch.block_tables_tensor
+            slots = batch.slots[batch.slot_indices]
+            input_lengths = batch.input_lengths_tensor
+            max_s = batch.max_seqlen
+            lm_head_indices = batch.prefill_head_indices
+
+        if cu_seqlen_prefill is None and self.max_past() is not None:
+            # In decode, not prefill, we're actually overwriting the KV-cache
+            # in a circular buffer mode.
+            # This makes sure the max_s for the decode pass is correct.
+            max_s = min(self.max_past(), max_s)
+
+        bs = input_ids.shape[0]
+        padded_bs = bs
+        if bs == 3:
+            padded_bs = 4
+        elif 3 < bs <= 8:
+            padded_bs = 8
+        elif bs > 8:
+            padded_bs = (bs + 7) // 8 * 8
+
+        # Try to find an associated cuda graph
+        cuda_graph = self.cuda_graphs.get(padded_bs, None)
+
+        if cu_seqlen_prefill is not None or cuda_graph is None:
+            logits, speculative_logits = self.model.forward(
+                input_ids=input_ids,
+                position_ids=position_ids,
+                cu_seqlen_prefill=cu_seqlen_prefill,
+                kv_cache=kv_cache,
+                block_tables=block_tables,
+                slots=slots,
+                input_lengths=input_lengths,
+                max_s=max_s,
+                prefill_cache_indices=batch.prefill_cache_indices,
+                lm_head_indices=lm_head_indices,
+                pixel_values=batch.pixel_values,
+                image_sizes=batch.image_sizes,
+            )
+            if batch.prefill_cache_indices is not None:
+                batch.prefill_cache_indices = None
+            if batch.pixel_values is not None:
+                batch.pixel_values = None
+            if batch.image_sizes is not None:
+                batch.image_sizes = None
+            return logits, speculative_logits
+
+        # Copy inputs to the static inputs of the cuda graph
+        # Static inputs are potentially padded
+        cuda_graph["input_ids"][: input_ids.shape[0]] = input_ids
+        cuda_graph["position_ids"][: position_ids.shape[0]] = position_ids
+        cuda_graph["block_tables"][
+            : block_tables.shape[0], : block_tables.shape[1]
+        ] = block_tables
+        cuda_graph["slots"].fill_(-1)
+        cuda_graph["slots"][: slots.shape[0]] = slots
+        cuda_graph["input_lengths"].zero_()
+        cuda_graph["input_lengths"][: input_lengths.shape[0]] = input_lengths
+
+        # Replay the graph
+        cuda_graph["graph"].replay()
+
+        # Slice output to the correct shape
+        speculative_logits = (
+            cuda_graph["speculative_logits"][:bs]
+            if cuda_graph["speculative_logits"] is not None
+            else None
+        )
+        logits = cuda_graph["logits"][:bs]
+        return logits, speculative_logits
diff --git a/server/text_generation_server/pb/.gitignore b/server/text_generation_server/pb/.gitignore
index 2621a190..5a68d631 100644
--- a/server/text_generation_server/pb/.gitignore
+++ b/server/text_generation_server/pb/.gitignore
@@ -1,3 +1,3 @@
 *.py
 *.pyi
-*.py-e
\ No newline at end of file
+*.py-e
diff --git a/server/text_generation_server/server.py b/server/text_generation_server/server.py
index 67358d3c..4a07733a 100644
--- a/server/text_generation_server/server.py
+++ b/server/text_generation_server/server.py
@@ -4,6 +4,7 @@ import asyncio
 import os
 import sys
 import torch
+import time
 
 from grpc import aio
 from loguru import logger
@@ -20,7 +21,12 @@ from text_generation_server.tracing import UDSOpenTelemetryAioServerInterceptor
 
 
 class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
-    def __init__(self, model: Model, cache: Cache, server_urls: List[str]):
+    def __init__(
+        self,
+        model: Model,
+        cache: Cache,
+        server_urls: List[str],
+    ):
         self.cache = cache
         self.model = model
         self.server_urls = server_urls
@@ -70,18 +76,23 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
         return generate_pb2.WarmupResponse()
 
     async def Prefill(self, request, context):
+        start = time.time_ns()
         batch = self.model.batch_type.from_pb(
             request.batch, self.model.tokenizer, self.model.dtype, self.model.device
         )
-        generations, next_batch = self.model.generate_token([batch])
+        generations, next_batch, timings = self.model.generate_token([batch])
         self.cache.set(next_batch)
 
         return generate_pb2.PrefillResponse(
             generations=[generation.to_pb() for generation in generations],
             batch=next_batch.to_pb() if next_batch else None,
+            forward_ns=timings[0],
+            decode_ns=timings[1],
+            total_ns=time.time_ns() - start,
         )
 
     async def Decode(self, request, context):
+        start = time.time_ns()
         if len(request.batches) == 0:
             raise ValueError("Must provide at least one batch")
 
@@ -95,21 +106,27 @@ class TextGenerationService(generate_pb2_grpc.TextGenerationServiceServicer):
         if len(batches) == 0:
             raise ValueError("All batches are empty")
 
-        generations, next_batch = self.model.generate_token(batches)
+        generations, next_batch, timings = self.model.generate_token(batches)
         self.cache.set(next_batch)
 
         return generate_pb2.DecodeResponse(
             generations=[generation.to_pb() for generation in generations],
             batch=next_batch.to_pb() if next_batch else None,
+            concat_ns=None, # TODO: measure concat time
+            forward_ns=timings[0],
+            decode_ns=timings[1],
+            total_ns=time.time_ns() - start,
         )
 
 
 def serve(
     model_id: str,
     revision: Optional[str],
-    dtype: Optional[str],
-    uds_path: Path,
     sharded: bool,
+    speculate: Optional[int],
+    dtype: Optional[str],
+    trust_remote_code: bool,
+    uds_path: Path,
 ):
     # Remove default handler
     logger.remove()
@@ -126,8 +143,10 @@ def serve(
     async def serve_inner(
         model_id: str,
         revision: Optional[str],
-        dtype: Optional[str] = None,
         sharded: bool = False,
+        speculate: Optional[int] = None,
+        dtype: Optional[str] = None,
+        trust_remote_code: bool = False,
     ):
         unix_socket_template = "unix://{}-{}"
         logger.info("Server:server_inner: sharded ={}".format(sharded))
@@ -151,7 +170,13 @@ def serve(
         if revision == "None":
             revision = None
         try:
-            model = get_model(model_id, revision=revision, dtype=data_type)
+            model = get_model(
+                model_id,
+                revision,
+                speculate,
+                data_type,
+                trust_remote_code
+            )
         except Exception:
             logger.exception("Error when initializing model")
             raise
@@ -181,13 +206,9 @@ def serve(
         except KeyboardInterrupt:
             logger.info("Signal received. Shutting down")
             await server.stop(0)
-        finally:
-            if hasattr(model,'finish_quantization_measurements'):
-                model.finish_quantization_measurements()
 
-    logger.info(
-        "Starting Server : model_id= {}, revision = {}  dtype = {}  sharded = {} ".format(
-            model_id, revision, dtype, sharded
+    asyncio.run(
+        serve_inner(
+            model_id, revision, sharded, speculate, dtype, trust_remote_code
         )
     )
-    asyncio.run(serve_inner(model_id, revision, dtype, sharded))
diff --git a/server/text_generation_server/tgi_service.py b/server/text_generation_server/tgi_service.py
index bf1bab40..f88c8c8b 100644
--- a/server/text_generation_server/tgi_service.py
+++ b/server/text_generation_server/tgi_service.py
@@ -9,12 +9,18 @@ import argparse
 def main(args):
     logger.info("TGIService: starting tgi service .... ")
     logger.info(
-        "TGIService: --model_id {}, --revision {}, --sharded {}, --dtype {}, --uds_path {} ".format(
-            args.model_id, args.revision, args.sharded, args.dtype, args.uds_path
+        "TGIService: --model_id {}, --revision {}, --sharded {}, --speculate {}, --dtype {}, --trust_remote_code {}, --uds_path {} ".format(
+            args.model_id, args.revision, args.sharded, args.speculate, args.dtype, args.trust_remote_code, args.uds_path
         )
     )
     server.serve(
-        model_id=args.model_id, revision=args.revision, dtype=args.dtype, uds_path=args.uds_path, sharded=args.sharded
+        model_id=args.model_id,
+        revision=args.revision,
+        sharded=args.sharded,
+        speculate=args.speculate,
+        dtype=args.dtype,
+        trust_remote_code=args.trust_remote_code,
+        uds_path=args.uds_path,
     )
 
 
@@ -23,7 +29,9 @@ if __name__ == "__main__":
     parser.add_argument("--model_id", type=str)
     parser.add_argument("--revision", type=str)
     parser.add_argument("--sharded", type=bool)
+    parser.add_argument("--speculate", type=int, default=None)
     parser.add_argument("--dtype", type=str)
+    parser.add_argument("--trust_remote_code", type=bool)
     parser.add_argument("--uds_path", type=Path)
     args = parser.parse_args()
     main(args)
diff --git a/server/text_generation_server/utils/awq/conversion_utils.py b/server/text_generation_server/utils/awq/conversion_utils.py
new file mode 100644
index 00000000..b19eafbb
--- /dev/null
+++ b/server/text_generation_server/utils/awq/conversion_utils.py
@@ -0,0 +1,97 @@
+import torch
+from typing import List
+
+
+AWQ_PACK_ORDER = [0, 2, 4, 6, 1, 3, 5, 7]
+REVERSE_AWQ_PACK_ORDER = [0, 4, 1, 5, 2, 6, 3, 7]
+
+
+def pack(imatrix: torch.Tensor, direction: str = "column"):
+    """
+    Packs a 4-bit integer matrix into a packed 32-bit integer matrix.
+    Args:
+        imatrix (torch.Tensor): matrix of integers
+        direction (str): direction of packing, either "column" or "row"
+    Returns:
+        qmatrix (torch.Tensor): packed matrix of integers
+    """
+    shifts = torch.arange(0, 32, 4, dtype=torch.int32, device=imatrix.device)
+
+    imatrix = imatrix.to(torch.int8) & 0x0F  # eventually correct overflow
+
+    if direction == "column":
+        imatrix = imatrix.view(-1, imatrix.shape[1] // (32 // 4), (32 // 4))
+        qmatrix = torch.bitwise_left_shift(imatrix, shifts[None, None, :]).sum(dim=-1)
+
+    elif direction == "row":
+        imatrix = imatrix.view(imatrix.shape[0] // (32 // 4), (32 // 4), -1)
+        qmatrix = torch.bitwise_left_shift(imatrix, shifts[None, :, None]).sum(dim=1)
+
+    qmatrix = qmatrix.to(torch.int32)
+
+    return qmatrix
+
+
+def unpack(qmatrix: torch.Tensor, direction: str = "column"):
+    """
+    Unpacks a 32-bit packed integer matrix into a 4-bit integer matrix.
+    Args:
+        qmatrix (torch.Tensor): matrix of packed integers
+        direction (str): direction of unpacking, either "column" or "row"
+    Returns:
+        imatrix (torch.Tensor): matrix of integers
+    """
+    shifts = torch.arange(0, 32, 4, device=qmatrix.device)
+
+    if direction == "column":
+        imatrix = torch.bitwise_right_shift(
+            qmatrix[:, :, None], shifts[None, None, :]
+        ).view(qmatrix.shape[0], -1)
+
+    elif direction == "row":
+        imatrix = torch.bitwise_right_shift(
+            qmatrix[:, None, :], shifts[None, :, None]
+        ).view(-1, qmatrix.shape[-1])
+
+    imatrix = imatrix.to(torch.int8) & 0x0F  # eventually correct overflow
+
+    return imatrix
+
+
+def apply_order(
+    imatrix: torch.Tensor,
+    direction: str = "column",
+    order: List[int] = AWQ_PACK_ORDER,
+):
+    """
+    Applies the order to a 4-bit integer matrix.
+    Args:
+        imatrix (torch.Tensor): matrix of integers
+        direction (str): direction of applying order, either "column" or "row"
+        order (List[int]): order to apply, default is AWQ_PACK_ORDER
+    Returns:
+        imatrix (torch.Tensor): matrix of integers
+    """
+    if direction == "column":
+        imatrix = imatrix.view(-1, (32 // 4))[:, order].view(imatrix.shape)
+    elif direction == "row":
+        imatrix = imatrix.view((32 // 4), -1)[order, :].view(imatrix.shape)
+
+    return imatrix
+
+
+def fast_awq_to_gptq(qweight, qzeros):
+    # awq uses column packing for both weights and zeros
+    izeros = unpack(qzeros, direction="column")
+    iweights = unpack(qweight, direction="column")
+
+    # Reverse the order of the iweight and izeros tensors
+    izeros = apply_order(izeros, direction="column", order=REVERSE_AWQ_PACK_ORDER)
+    iweights = apply_order(iweights, direction="column", order=REVERSE_AWQ_PACK_ORDER)
+    # Subtract 1 from the izeros tensor (gptq adds 1 to the zeros)
+    izeros = izeros - 1
+    # exllama uses row packing for weights and column packing for zeros
+    qzeros = pack(izeros, direction="column")
+    qweight = pack(iweights, direction="row")
+
+    return qweight, qzeros
diff --git a/server/text_generation_server/utils/convert.py b/server/text_generation_server/utils/convert.py
index 0b62f520..d9c3276b 100644
--- a/server/text_generation_server/utils/convert.py
+++ b/server/text_generation_server/utils/convert.py
@@ -68,7 +68,7 @@ def convert_file(pt_file: Path, sf_file: Path, discard_names: List[str]):
     Forcing us to check for potentially different keys during load when looking
     for specific tensors (making tensor sharing explicit).
     """
-    loaded = torch.load(pt_file, map_location="cpu")
+    loaded = torch.load(pt_file, map_location="cpu", weights_only=True)
     if "state_dict" in loaded:
         loaded = loaded["state_dict"]
     to_removes = _remove_duplicate_names(loaded, discard_names=discard_names)
diff --git a/server/text_generation_server/utils/flash_attn.py b/server/text_generation_server/utils/flash_attn.py
index aca95e11..45090c64 100644
--- a/server/text_generation_server/utils/flash_attn.py
+++ b/server/text_generation_server/utils/flash_attn.py
@@ -23,10 +23,15 @@ try:
     try:
         import flash_attn_2_cuda
     except ImportError:
+        architecture_suffix = ""
+        if IS_CUDA_SYSTEM:
+            architecture_suffix = "-cuda"
+        elif IS_ROCM_SYSTEM:
+            architecture_suffix = "-rocm"
         raise ImportError(
             "Flash Attention V2 is not installed.\n"
             "Use the official Docker image (ghcr.io/huggingface/text-generation-inference:latest) "
-            "or install flash attention v2 with `cd server && make install install-flash-attention-v2`"
+            f"or install flash attention v2 with `cd server && make install install-flash-attention-v2{architecture_suffix}`"
         )
     if not (is_sm8x or is_sm90):
         raise ImportError(
@@ -51,7 +56,9 @@ except ImportError as e:
         ) from e
     elif IS_ROCM_SYSTEM:
         for idx in range(torch.cuda.device_count()):
-            if "MI210" not in torch.cuda.get_device_name(idx) and "MI250" not in torch.cuda.get_device_name(idx):
+            if "MI210" not in torch.cuda.get_device_name(
+                idx
+            ) and "MI250" not in torch.cuda.get_device_name(idx):
                 raise ImportError(
                     f"AMD GPU {torch.cuda.get_device_name(idx)} does not support flash-attention"
                 )
@@ -70,6 +77,9 @@ def attention(
     softmax_scale,
     window_size_left=-1,
 ):
+    if window_size_left <= 0 and window_size_left != -1:
+        raise ValueError("`window_size_left` must be > 0 or -1")
+
     if HAS_FLASH_ATTN_V2_CUDA:
         return flash_attn_2_cuda.varlen_fwd(
             q,
@@ -78,6 +88,9 @@ def attention(
             out,
             cu_seqlens,
             cu_seqlens,
+            None,
+            None,
+            None,
             max_s,
             max_s,
             0.0,
@@ -91,8 +104,10 @@ def attention(
         )
     elif HAS_FLASH_ATTN_V2_ROCM:
         if window_size_left != -1:
-            raise ValueError(f"RoCm version of Flash Attention v2 does not support window attention (window_size_left != -1, got window_size_left={window_size_left}).")
-        
+            raise ValueError(
+                f"RoCm version of Flash Attention v2 does not support window attention (window_size_left != -1, got window_size_left={window_size_left})."
+            )
+
         # RoCm flash API does not take the window_size_left and window_size_right arguments.
         return flash_attn_2_cuda.varlen_fwd(
             q,
diff --git a/server/text_generation_server/utils/gptq/custom_autotune.py b/server/text_generation_server/utils/gptq/custom_autotune.py
index 17dff02e..1eb40f1e 100644
--- a/server/text_generation_server/utils/gptq/custom_autotune.py
+++ b/server/text_generation_server/utils/gptq/custom_autotune.py
@@ -88,9 +88,9 @@ class Autotuner(triton.KernelInterface):
             # In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
             # PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
             return triton.testing.do_bench(
-                kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40
+                kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
             )
-        except triton.compiler.OutOfResources:
+        except triton.OutOfResources:
             return (float("inf"), float("inf"), float("inf"))
 
     def run(self, *args, **kwargs):
diff --git a/server/text_generation_server/utils/gptq/exllama.py b/server/text_generation_server/utils/gptq/exllama.py
index 7353afb5..32f817db 100644
--- a/server/text_generation_server/utils/gptq/exllama.py
+++ b/server/text_generation_server/utils/gptq/exllama.py
@@ -37,19 +37,12 @@ def set_device(device):
     DEVICE = device
 
 
-def create_exllama_buffers():
+def create_exllama_buffers(max_total_tokens: int):
     global MAX_DQ, MAX_INNER, ACT_ORDER, DEVICE, TEMP_STATE, TEMP_DQ
 
     assert DEVICE is not None, "call set_device first"
 
-    if ACT_ORDER:
-        # TODO: this should be set to rust side `max_total_tokens`, but TGI
-        # does not offer an API to expose this variable to python, as this variable
-        # is handled by the client but it appears the model is initialized by the server.
-        # An alternative could be to initialize the buffers during warmup.
-        # Dummy
-        max_total_tokens = 2048
-    else:
+    if not ACT_ORDER:
         max_total_tokens = 1
 
     # This temp_state buffer is required to reorder X in the act-order case.
diff --git a/server/text_generation_server/utils/gptq/exllamav2.py b/server/text_generation_server/utils/gptq/exllamav2.py
index 1945338b..80836a95 100644
--- a/server/text_generation_server/utils/gptq/exllamav2.py
+++ b/server/text_generation_server/utils/gptq/exllamav2.py
@@ -1,50 +1,82 @@
 # Adapted from turboderp exllama: https://github.com/turboderp/exllamav2
 
-from logging import getLogger
-
 import torch
 import torch.nn as nn
-import math
 
-logger = getLogger(__name__)
+from loguru import logger
 
 try:
     from exllamav2_kernels import make_q_matrix, gemm_half_q_half
 except ImportError:
-    logger.error('exllamav2_kernels not installed.')
+    logger.error("exllamav2_kernels not installed.")
     raise
 
 # Dummy tensor to pass instead of g_idx since there is no way to pass "None" to a C++ extension
 none_tensor = torch.empty((1, 1), device="meta")
 
+
 def ext_gemm_half_q_half(x, q_handle, q4_width, force_cuda):
     """Matrix multiplication, returns x @ q4"""
     output_shape = x.shape[:-1] + (q4_width,)
     x = x.view(-1, x.shape[-1])
-    output = torch.empty((x.shape[0], q4_width), dtype = torch.half, device = x.device)
+    output = torch.empty((x.shape[0], q4_width), dtype=torch.half, device=x.device)
     gemm_half_q_half(x, q_handle, output, force_cuda)
     return output.view(output_shape)
 
+
+# Group map needed for irregular group sizes
+
+
+def make_group_map(q_groups, num_qrows):
+
+    gr = q_groups.tolist()
+    group_map = []
+    num_groups = len(gr) // 2
+
+    for i in range(num_groups):
+        bits = gr[i * 2]
+        if i < num_groups - 1:
+            qrows = gr[i * 2 + 3] - gr[i * 2 + 1]
+        else:
+            qrows = num_qrows - gr[i * 2 + 1]
+        rows = qrows * 32 // bits
+        for j in range(rows):
+            group_map += [i]
+            group_map += [rows - j]
+
+    return torch.tensor(group_map, dtype=torch.short, device=q_groups.device)
+
+
+# Create Q matrix
+
+
 def ext_make_q_matrix(w: dict, temp_dq, key: str = None):
     """
-    Create Q matrix 
+    Create Q matrix
     """
     # EXL2
-    # won't work as the moment because the tensors are not the same. 
+    # won't work as the moment because the tensors are not the same.
     if "q_weight" in w:
         w["q_scale_max"] /= 256
         w["q_perm"] = w["q_perm"].short()
         w["q_invperm"] = w["q_invperm"].short()
-        return make_q_matrix(w["q_weight"],
-                                w["q_perm"],
-                                w["q_invperm"],
-                                w["q_scale"],
-                                w["q_scale_max"],
-                                w["q_groups"],
-                                none_tensor,
-                                none_tensor,
-                                none_tensor,
-                                temp_dq)
+
+        if "q_group_map" not in w:
+            w["q_group_map"] = make_group_map(w["q_groups"], w["q_weight"].shape[0])
+
+        return make_q_matrix(
+            w["q_weight"],
+            w["q_perm"],
+            w["q_invperm"],
+            w["q_scale"],
+            w["q_scale_max"],
+            w["q_groups"],
+            w["q_group_map"],
+            none_tensor,
+            none_tensor,
+            none_tensor,
+            temp_dq,
+        )
     # GPTQ
     elif "qweight" in w:
         if w["scales"].dtype == torch.float:
@@ -52,31 +84,42 @@ def ext_make_q_matrix(w: dict, temp_dq, key: str = None):
 
         # GPTQ with g_idx (act_order)
         if w.get("g_idx", None) is not None and not (w["g_idx"] == 0).all().item():
-            w["q_perm"] = torch.empty((w["qweight"].shape[0] * 8,), dtype = torch.short, device = w["qweight"].device)
+            w["q_perm"] = torch.empty(
+                (w["qweight"].shape[0] * 8,),
+                dtype=torch.short,
+                device=w["qweight"].device,
+            )
             w["q_invperm"] = torch.empty_like(w["q_perm"])
             # make_q4 segfaults if g_idx is not on cpu in the act-order case. In the non act-order case, None needs to be passed for g_idx.
-            return make_q_matrix(w["qweight"],
-                                 w["q_perm"],
-                                 w["q_invperm"],
-                                 none_tensor,
-                                 none_tensor,
-                                 none_tensor,
-                                 w["qzeros"],
-                                 w["scales"],
-                                 w["g_idx"].cpu(),
-                                 temp_dq)
+            return make_q_matrix(
+                w["qweight"],
+                w["q_perm"],
+                w["q_invperm"],
+                none_tensor,
+                none_tensor,
+                none_tensor,
+                none_tensor,
+                w["qzeros"],
+                w["scales"],
+                w["g_idx"].cpu(),
+                temp_dq,
+            )
         # GPTQ without g_idx
         else:
-            return make_q_matrix(w["qweight"],
-                                none_tensor,
-                                none_tensor,
-                                none_tensor,
-                                none_tensor,
-                                none_tensor,
-                                w["qzeros"],
-                                w["scales"],
-                                none_tensor,
-                                temp_dq)
+            return make_q_matrix(
+                w["qweight"],
+                none_tensor,
+                none_tensor,
+                none_tensor,
+                none_tensor,
+                none_tensor,
+                none_tensor,
+                w["qzeros"],
+                w["scales"],
+                none_tensor,
+                temp_dq,
+            )
+
 
 DEVICE = None
 FIXED_BYTES = 0
@@ -88,7 +131,7 @@ def set_device(device):
     DEVICE = device
 
 
-def create_exllama_buffers():
+def create_exllama_buffers(max_total_tokens: int):
     global FIXED_BYTES, LAYERS, DEVICE
     temp_dq = ExLlamaV2DeviceTensors(DEVICE, FIXED_BYTES)
 
@@ -106,14 +149,15 @@ class QuantLinear(nn.Module):
         super().__init__()
         if bits != 4:
             raise ValueError(
-                f"Exllamav2 kernel supports only bits=4, requested bits={bits}. Something is wrong in the model initialization.")
+                f"Exllamav2 kernel supports only bits=4, requested bits={bits}. Something is wrong in the model initialization."
+            )
         self.q_handle = None
         self.q_tensors = None
         self.bits = bits
-        self.maxq = 2 ** self.bits - 1
+        self.maxq = 2**self.bits - 1
         self.infeatures = qweight.shape[0] // self.bits * 32
         self.outfeatures = qweight.shape[1]
-        self.padding = - self.outfeatures % 32
+        self.padding = -self.outfeatures % 32
         self.outfeatures = self.outfeatures + self.padding
 
         self.device = qweight.device
@@ -124,14 +168,6 @@ class QuantLinear(nn.Module):
         self.bias = bias if bias is not None else None
         self.group_size = groupsize
 
-        infeatures = self.infeatures
-        outfeatures = self.outfeatures
-        assert qweight.shape == (infeatures // 32 * self.bits, outfeatures)
-        assert infeatures % self.group_size == 0
-        assert qzeros.shape == (infeatures // self.group_size, outfeatures // 32 * self.bits)
-        assert scales.shape == (infeatures // self.group_size, outfeatures)
-        assert g_idx.shape == (infeatures, ), f"{g_idx.shape}, {infeatures}"
-
         global FIXED_BYTES, LAYERS
         FIXED_BYTES = max(FIXED_BYTES, self.scratch_space_fixed())
         LAYERS.append(self)
@@ -140,33 +176,35 @@ class QuantLinear(nn.Module):
         assert self.qweight.device.type == "cuda"
         assert self.qweight.device.index is not None
         self.q_tensors = {
-            "qweight":self.qweight,
-            "qzeros":self.qzeros,
-            "scales":self.scales,
-            "g_idx":self.g_idx
+            "qweight": self.qweight,
+            "qzeros": self.qzeros,
+            "scales": self.scales,
+            "g_idx": self.g_idx,
         }
         temp_dq = temp_dq.get_scratch_slice(self.temp_dq_size())
-        self.q_handle = ext_make_q_matrix(
-            self.q_tensors, temp_dq
-        )
-    
-    def forward(self, x, force_cuda = False):
+
+        # We NEED to keep a pointer on Python side, otherwise the garbage collector will mess with us,
+        # and `Memory access fault by GPU node-2` will EAT you.
+        self.temp_dq = temp_dq
+        self.q_handle = ext_make_q_matrix(self.q_tensors, temp_dq)
+
+    def forward(self, x, force_cuda=False):
         output = ext_gemm_half_q_half(x, self.q_handle, self.outfeatures, force_cuda)
 
         if self.bias is not None:
             output.add_(self.bias)
         return output
-    
+
     def temp_dq_size(self):
         return self.infeatures * self.outfeatures * 2 + 128
-    
+
     def temp_fwd_size(self, max_input_len, max_batch_size):
         return self.outfeatures * max_input_len * max_batch_size * 4 + 128
-    
+
     def scratch_space_fixed(self, max_input_len=4096, max_batch_size=16):
         return self.temp_dq_size() + self.temp_fwd_size(max_input_len, max_batch_size)
-               
-    
+
+
 class ExLlamaV2DeviceTensors:
 
     device_idx: int
@@ -177,13 +215,16 @@ class ExLlamaV2DeviceTensors:
     def __init__(self, device, scratch_bytes):
         self.device = device
         self.scratch_bytes = scratch_bytes
-    
+
     def prepare(self):
-        self.scratch = torch.empty((self.scratch_bytes // 2,), dtype = torch.half, device = self.device)
+        self.scratch = torch.empty(
+            (self.scratch_bytes // 2,), dtype=torch.half, device=self.device
+        )
 
     def get_scratch_slice(self, size_bytes):
 
-        if self.scratch is None: self.prepare()
+        if self.scratch is None:
+            self.prepare()
 
         size_bytes = ((size_bytes + 127) // 128) * 128
         size_half = size_bytes // 2
diff --git a/server/text_generation_server/utils/gptq/quant_linear.py b/server/text_generation_server/utils/gptq/quant_linear.py
index bfc91c00..a832f755 100644
--- a/server/text_generation_server/utils/gptq/quant_linear.py
+++ b/server/text_generation_server/utils/gptq/quant_linear.py
@@ -182,7 +182,7 @@ try:
             )  # (BLOCK_SIZE_K, BLOCK_SIZE_N,)
 
             zeros = (zeros >> zeros_shifter[None, :]) & maxq
-            zeros = zeros + 1
+            zeros = (zeros + 1) & maxq  # eventually avoid overflow
 
             a = tl.load(a_ptrs, mask=a_mask, other=0.0)  # (BLOCK_SIZE_M, BLOCK_SIZE_K)
             b = tl.load(b_ptrs)  # (BLOCK_SIZE_K, BLOCK_SIZE_N), but repeated
diff --git a/server/text_generation_server/utils/hub.py b/server/text_generation_server/utils/hub.py
index 23743c9b..a81e659d 100644
--- a/server/text_generation_server/utils/hub.py
+++ b/server/text_generation_server/utils/hub.py
@@ -6,24 +6,33 @@ from loguru import logger
 from pathlib import Path
 from typing import Optional, List
 
-from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub import file_download, hf_api, HfApi, hf_hub_download
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from huggingface_hub.utils import (
     LocalEntryNotFoundError,
     EntryNotFoundError,
-    RevisionNotFoundError,  # Import here to ease try/except in other part of the lib
+    RevisionNotFoundError,  # noqa # Import here to ease try/except in other part of the lib
 )
 
 WEIGHTS_CACHE_OVERRIDE = os.getenv("WEIGHTS_CACHE_OVERRIDE", None)
+HF_HUB_OFFLINE = os.environ.get("HF_HUB_OFFLINE", "0").lower() in ["true", "1", "yes"]
 
 
-def weight_hub_files(
-    model_id: str, revision: Optional[str] = None, extension: str = ".safetensors"
+def _cached_weight_files(
+    model_id: str, revision: Optional[str], extension: str
 ) -> List[str]:
-    """Get the weights filenames on the hub"""
-    api = HfApi()
-    info = api.model_info(model_id, revision=revision)
-    filenames = [
+    """Guess weight files from the cached revision snapshot directory"""
+    d = _get_cached_revision_directory(model_id, revision)
+    if not d:
+        return []
+    filenames = _weight_files_from_dir(d, extension)
+    return filenames
+
+
+def _weight_hub_files_from_model_info(
+    info: hf_api.ModelInfo, extension: str
+) -> List[str]:
+    return [
         s.rfilename
         for s in info.siblings
         if s.rfilename.endswith(extension)
@@ -31,26 +40,37 @@ def weight_hub_files(
         and "arguments" not in s.rfilename
         and "args" not in s.rfilename
         and "training" not in s.rfilename
+        and "medusa_lm_head" not in s.rfilename
     ]
 
-    if not filenames:
-        raise EntryNotFoundError(
-            f"No {extension} weights found for model {model_id} and revision {revision}.",
-            None,
-        )
 
+def _weight_files_from_dir(d: Path, extension: str) -> List[str]:
+    # os.walk: do not iterate, just scan for depth 1, not recursively
+    # see _weight_hub_files_from_model_info, that's also what is
+    # done there with the len(s.rfilename.split("/")) == 1 condition
+    root, _, files = next(os.walk(str(d)))
+    filenames = [
+        os.path.join(root, f)
+        for f in files
+        if f.endswith(extension)
+        and "arguments" not in f
+        and "args" not in f
+        and "adapter" not in f
+        and "training" not in f
+        and "medusa_lm_head" not in f
+    ]
     return filenames
 
 
-def try_to_load_from_cache(
-    model_id: str, revision: Optional[str], filename: str
+def _get_cached_revision_directory(
+    model_id: str, revision: Optional[str]
 ) -> Optional[Path]:
-    """Try to load a file from the Hugging Face cache"""
     if revision is None:
         revision = "main"
 
-    object_id = model_id.replace("/", "--")
-    repo_cache = Path(HUGGINGFACE_HUB_CACHE) / f"models--{object_id}"
+    repo_cache = Path(HUGGINGFACE_HUB_CACHE) / Path(
+        file_download.repo_folder_name(repo_id=model_id, repo_type="model")
+    )
 
     if not repo_cache.is_dir():
         # No cache for this model
@@ -74,8 +94,42 @@ def try_to_load_from_cache(
         # No cache for this revision and we won't try to return a random revision
         return None
 
+    return snapshots_dir / revision
+
+
+def weight_hub_files(
+    model_id: str, revision: Optional[str] = None, extension: str = ".safetensors"
+) -> List[str]:
+    """Get the weights filenames on the hub"""
+    api = HfApi()
+
+    if HF_HUB_OFFLINE:
+        filenames = _cached_weight_files(model_id, revision, extension)
+    else:
+        # Online case, fetch model info from the Hub
+        info = api.model_info(model_id, revision=revision)
+        filenames = _weight_hub_files_from_model_info(info, extension)
+
+    if not filenames:
+        raise EntryNotFoundError(
+            f"No {extension} weights found for model {model_id} and revision {revision}.",
+            None,
+        )
+
+    return filenames
+
+
+def try_to_load_from_cache(
+    model_id: str, revision: Optional[str], filename: str
+) -> Optional[Path]:
+    """Try to load a file from the Hugging Face cache"""
+
+    d = _get_cached_revision_directory(model_id, revision)
+    if not d:
+        return None
+
     # Check if file exists in cache
-    cached_file = snapshots_dir / revision / filename
+    cached_file = d / filename
     return cached_file if cached_file.is_file() else None
 
 
@@ -84,13 +138,14 @@ def weight_files(
 ) -> List[Path]:
     """Get the local files"""
     # Local model
-    if Path(model_id).exists() and Path(model_id).is_dir():
-        local_files = list(Path(model_id).glob(f"*{extension}"))
+    d = Path(model_id)
+    if d.exists() and d.is_dir():
+        local_files = _weight_files_from_dir(d, extension)
         if not local_files:
             raise FileNotFoundError(
                 f"No local weights found in {model_id} with extension {extension}"
             )
-        return local_files
+        return [Path(f) for f in local_files]
 
     try:
         filenames = weight_hub_files(model_id, revision, extension)
@@ -138,33 +193,33 @@ def download_weights(
 ) -> List[Path]:
     """Download the safetensors files from the hub"""
 
-    def download_file(filename, tries=5, backoff: int = 5):
-        local_file = try_to_load_from_cache(model_id, revision, filename)
+    def download_file(fname, tries=5, backoff: int = 5):
+        local_file = try_to_load_from_cache(model_id, revision, fname)
         if local_file is not None:
-            logger.info(f"File {filename} already present in cache.")
+            logger.info(f"File {fname} already present in cache.")
             return Path(local_file)
 
-        for i in range(tries):
+        for idx in range(tries):
             try:
-                logger.info(f"Download file: {filename}")
-                start_time = time.time()
+                logger.info(f"Download file: {fname}")
+                stime = time.time()
                 local_file = hf_hub_download(
-                    filename=filename,
+                    filename=fname,
                     repo_id=model_id,
                     revision=revision,
-                    local_files_only=False,
+                    local_files_only=HF_HUB_OFFLINE,
                 )
                 logger.info(
-                    f"Downloaded {local_file} in {timedelta(seconds=int(time.time() - start_time))}."
+                    f"Downloaded {local_file} in {timedelta(seconds=int(time.time() - stime))}."
                 )
                 return Path(local_file)
             except Exception as e:
-                if i + 1 == tries:
+                if idx + 1 == tries:
                     raise e
                 logger.error(e)
                 logger.info(f"Retrying in {backoff} seconds")
                 time.sleep(backoff)
-                logger.info(f"Retry {i + 1}/{tries - 1}")
+                logger.info(f"Retry {idx + 1}/{tries - 1}")
 
     # We do this instead of using tqdm because we want to parse the logs with the launcher
     start_time = time.time()
diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py
index a93ccd0e..9cf5c80f 100644
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@@ -4,7 +4,7 @@ import torch.distributed
 
 from torch import nn
 from torch.nn import functional as F
-from typing import List
+from typing import List, Tuple, Optional
 from loguru import logger
 from functools import lru_cache
 
@@ -18,7 +18,7 @@ except ImportError:
 from accelerate import init_empty_weights
 
 from text_generation_server.utils.gptq.quant_linear import QuantLinear
-from text_generation_server.utils.import_utils import IS_CUDA_SYSTEM, IS_ROCM_SYSTEM 
+from text_generation_server.utils.import_utils import IS_CUDA_SYSTEM, IS_ROCM_SYSTEM
 
 HAS_AWQ = True
 try:
@@ -32,34 +32,33 @@ except Exception:
     major = 1
 
 HAS_EXLLAMA = False
-CAN_EXLLAMA = major >= 8
+CAN_EXLLAMA = major >= 8 or IS_ROCM_SYSTEM
 V2 = os.getenv("EXLLAMA_VERSION", "2") == "2"
-if V2 and int(os.getenv("WORLD_SIZE", "1")) > 1:
-    logger.warning("Disabling exllama v2 and using v1 instead because there are issues when sharding")
-    V2 = False
 
 if os.getenv("DISABLE_EXLLAMA") == "True":
     HAS_EXLLAMA = False
 elif CAN_EXLLAMA:
     try:
         if V2:
-            from text_generation_server.utils.gptq.exllamav2 import (QuantLinear as ExllamaQuantLinear, 
-                    create_exllama_buffers,
-                    set_device,
-                                                                     )
+            from text_generation_server.utils.gptq.exllamav2 import (
+                QuantLinear as ExllamaQuantLinear,
+                create_exllama_buffers,
+                set_device,
+            )
+
             HAS_EXLLAMA = "2"
         else:
-            from text_generation_server.utils.gptq.exllama import (Ex4bitLinear as ExllamaQuantLinear,
-                    create_exllama_buffers,
-                    set_device,
-                )
+            from text_generation_server.utils.gptq.exllama import (
+                Ex4bitLinear as ExllamaQuantLinear,
+                create_exllama_buffers,
+                set_device,
+            )
+
             HAS_EXLLAMA = "1"
 
     except ImportError:
         pass
 
-from typing import Optional
-
 HAS_EETQ = False
 try:
     from EETQ import quant_weights, w8_a16_gemm
@@ -168,6 +167,8 @@ class EETQLinear(nn.Module):
     ) -> None:
         super().__init__()
         device = weight.device
+        if weight.dtype != torch.float16:
+            weight = weight.to(dtype=torch.float16)
         weight = torch.t(weight).contiguous().cpu()
         weight, scale = quant_weights(weight, torch.int8, False)
 
@@ -181,6 +182,48 @@ class EETQLinear(nn.Module):
         return output
 
 
+def fp8_quantize(weight, qdtype=torch.float8_e4m3fn):
+    device = weight.device
+    # weight, scale = quant_weights(weight, torch.int8, False)
+    finfo = torch.finfo(qdtype)
+    # Calculate the scale as dtype max divided by absmax
+    scale = finfo.max / weight.abs().max().clamp(min=1e-12)
+    # scale and clamp the tensor to bring it to
+    # the representative range of float8 data type
+    # (as default cast is unsaturated)
+    qweight = (weight * scale).clamp(min=finfo.min, max=finfo.max)
+    # Return both float8 data and the inverse scale (as float),
+    # as both required as inputs to torch._scaled_mm
+    qweight = qweight.to(qdtype)
+    scale = scale.float().reciprocal()
+    return qweight, scale
+
+
+class Fp8Linear(nn.Module):
+    def __init__(
+        self,
+        weight,
+        bias,
+    ) -> None:
+        super().__init__()
+        self.dtype = weight.dtype
+        self.qweight, self.scale = fp8_quantize(weight)
+
+        self.bias = bias if bias is not None else None
+
+    def forward(self, input: torch.Tensor) -> torch.Tensor:
+        qinput, scale = fp8_quantize(input)
+        output, _ = torch._scaled_mm(
+            qinput,
+            self.qweight.t(),
+            out_dtype=self.dtype,
+            scale_a=scale,
+            scale_b=self.scale,
+            bias=self.bias,
+        )
+        return output
+
+
 class Linear8bitLt(nn.Module):
     def __init__(
         self,
@@ -292,6 +335,8 @@ def get_linear(weight, bias, quantize):
             raise ImportError(
                 "Please install EETQ from https://github.com/NetEase-FuXi/EETQ"
             )
+    elif quantize == "fp8":
+        linear = Fp8Linear(weight, bias)
     elif quantize == "bitsandbytes":
         warn_deprecate_bnb()
         linear = Linear8bitLt(
@@ -323,7 +368,9 @@ def get_linear(weight, bias, quantize):
             )
 
         if use_exllama:
-            linear = ExllamaQuantLinear(qweight, qzeros, scales, g_idx, bias, bits, groupsize)
+            linear = ExllamaQuantLinear(
+                qweight, qzeros, scales, g_idx, bias, bits, groupsize
+            )
         else:
             linear = QuantLinear(
                 qweight,
@@ -341,6 +388,15 @@ def get_linear(weight, bias, quantize):
             raise NotImplementedError(
                 f"The passed weight is not `awq` compatible, loader needs to be updated."
             )
+        if IS_ROCM_SYSTEM:
+            raise NotImplementedError(
+                "AWQ GEMM kernel can't be used on ROCm systems, please use `--quantize gptq` instead "
+                "to use Exllama/GPTQ kernels for AWQ inference."
+            )
+        if not HAS_AWQ:
+            raise NotImplementedError(
+                "You do not seem to have awq installed, either install it (cd server &&  make install-awq), or try using GPTQ `---quantize gptq` a conversion AWQ->GPTQ will happen on the fly"
+            )
         linear = WQLinear(
             w_bit=bits,
             group_size=groupsize,
@@ -363,6 +419,214 @@ class SuperLayer(nn.Module):
         return self.linear.forward(x)
 
 
+class ResBlock(torch.nn.Module):
+    def __init__(self, config, prefix, weights):
+        super().__init__()
+        self.linear = FastLinear.load(
+            config, prefix=f"{prefix}.linear", weights=weights, bias=True
+        )
+        self.act = torch.nn.SiLU()
+
+    def forward(self, x):
+        return x + self.act(self.linear(x))
+
+
+class MedusaModel(torch.nn.Module):
+    def __init__(self, config, medusa_config, weights):
+        super().__init__()
+        self.heads = torch.nn.ModuleList(
+            [
+                MedusaHead(config, medusa_config, prefix=f"{i}", weights=weights)
+                for i in range(medusa_config["medusa_num_heads"])
+            ]
+        )
+
+    def forward(self, x):
+        speculative_logits = torch.stack([head(x) for head in self.heads], dim=1)
+        return speculative_logits
+
+
+class MedusaHead(torch.nn.Module):
+    def __init__(self, config, medusa_config, prefix, weights):
+        super().__init__()
+        self.blocks = torch.nn.ModuleList(
+            [
+                ResBlock(config, prefix=f"{prefix}.{i}", weights=weights)
+                for i in range(medusa_config["medusa_num_layers"])
+            ]
+        )
+        n = len(self.blocks)
+        self.out = FastLinear.load(
+            config, prefix=f"{prefix}.{n}", weights=weights, bias=False
+        )
+
+    def forward(self, x):
+        for block in self.blocks:
+            x = block(x)
+        x = self.out(x)
+        return x
+
+
+class MedusaHeadV1(nn.Module):
+    def __init__(self, lm_head, medusa):
+        super().__init__()
+        self.lm_head = lm_head
+        self.medusa = medusa
+
+    @staticmethod
+    def load(config, prefix: str, weights):
+        from pathlib import Path
+        from safetensors import safe_open
+        import json
+
+        use_medusa = config.use_medusa
+
+        medusa_config = str(Path(use_medusa) / "config.json")
+        filename = str(Path(use_medusa) / "medusa_lm_head.safetensors")
+
+        with open(medusa_config, "r") as f:
+            medusa_config = json.load(f)
+        routing = weights.routing
+        with safe_open(filename, framework="pytorch") as f:
+            for k in f.keys():
+                if k in routing and routing[k] != filename:
+                    raise RuntimeError(
+                        f"Key {k} was found in multiple files: {filename} and {routing[k]}"
+                    )
+                routing[k] = filename
+
+        medusa = MedusaModel(config, medusa_config, weights)
+        lm_head = TensorParallelHead.load(config, prefix, weights)
+        return MedusaHeadV1(lm_head, medusa)
+
+    def forward(
+        self, input: torch.Tensor
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        logits = self.lm_head(input)
+        # If we have too many tokens, we skip speculative logits
+        if input.shape[0] > 128:
+            return logits, None
+
+        speculative_logits = self.medusa(input)
+        return logits, speculative_logits
+
+
+class MedusaHeadV2(nn.Module):
+    def __init__(self, config, prefix, weights):
+        super().__init__()
+        from pathlib import Path
+        from safetensors import safe_open
+        import json
+
+        use_medusa = config.use_medusa
+
+        medusa_config = str(Path(use_medusa) / "config.json")
+        filename = str(Path(use_medusa) / "medusa_lm_head.safetensors")
+
+        with open(medusa_config, "r") as f:
+            medusa_config = json.load(f)
+        routing = weights.routing
+        with safe_open(filename, framework="pytorch") as f:
+            for k in f.keys():
+                if k in routing and routing[k] != filename:
+                    raise RuntimeError(
+                        f"Key {k} was found in multiple files: {filename} and {routing[k]}"
+                    )
+                routing[k] = filename
+
+        self.n_medusa_heads = medusa_config["medusa_num_heads"]
+
+        assert medusa_config["medusa_num_layers"] == 1
+        self.linear = TensorParallelColumnLinear.load_multi(
+            config,
+            prefixes=[f"{i}.0.linear" for i in range(self.n_medusa_heads)],
+            dim=0,
+            weights=weights,
+            bias=True,
+        )
+        self.process_group = weights.process_group
+        self.world_size = self.process_group.size()
+        self.rank = self.process_group.rank()
+
+        self.act = torch.nn.SiLU()
+
+        self.lm_head = TensorParallelHead.load(config, prefix, weights)
+
+    def forward(self, x):
+        # If we have too many tokens, we skip speculative logits
+        if x.shape[0] > 128:
+            logits = self.lm_head(x)
+            return logits, None
+
+        size = x.shape[-1]
+        block_size = (size + self.world_size - 1) // self.world_size
+        start = self.rank * block_size
+        stop = (self.rank + 1) * block_size
+
+        x_block = x[:, start:stop]
+
+        # Compute all medusa heads at the same time, then reshape and move the n_medusa_heads dim to dim 1
+        medusa_res = self.act(self.linear(x)).reshape(
+            *x_block.shape[:-1], self.n_medusa_heads, x_block.shape[-1]
+        )
+
+        # Apply all residual medusa heads
+        output = x[:, start:stop].unsqueeze(-2) + medusa_res
+
+        # Gather medusa heads
+        world_output = [
+            torch.empty_like(output) for _ in range(self.process_group.size())
+        ]
+        torch.distributed.all_gather(world_output, output, group=self.process_group)
+        world_output = torch.cat(world_output, dim=-1)
+
+        # Stack x and medusa residual x
+        stacked_x = torch.cat([x.unsqueeze(-2), world_output], dim=-2)
+
+        # Compute lm head on x + medusa residual x
+        logits = self.lm_head(stacked_x)
+
+        # Finally, split logits from speculative logits
+        logits, speculative_logits = torch.split(
+            logits, [1, self.n_medusa_heads], dim=-2
+        )
+        # Squeeze added dimension
+        logits = logits.squeeze(-2)
+
+        return logits, speculative_logits
+
+
+class SpeculativeHead(nn.Module):
+    def __init__(self, lm_head, medusa):
+        super().__init__()
+        self.head = lm_head
+        self.medusa = medusa
+
+    @staticmethod
+    def load(config, prefix: str, weights):
+        use_medusa = config.use_medusa
+        if use_medusa:
+            lm_head = None
+            try:
+                medusa = MedusaHeadV1.load(config, prefix, weights)
+            except:
+                medusa = MedusaHeadV2(config, prefix, weights)
+        else:
+            lm_head = TensorParallelHead.load(config, prefix, weights)
+            medusa = None
+        return SpeculativeHead(lm_head, medusa)
+
+    def forward(
+        self, input: torch.Tensor
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
+        if self.medusa is not None:
+            return self.medusa(input)
+
+        assert self.head is not None
+        logits = self.head(input)
+        return logits, None
+
+
 class TensorParallelHead(SuperLayer):
     def __init__(self, linear, process_group, should_gather: bool):
         super().__init__(linear)
@@ -481,9 +745,9 @@ class TensorParallelRowLinear(SuperLayer):
             process_group=weights.process_group,
         )
 
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
+    def forward(self, input: torch.Tensor, reduce: bool = True) -> torch.Tensor:
         out = super().forward(input)
-        if self.process_group.size() > 1:
+        if self.process_group.size() > 1 and reduce:
             torch.distributed.all_reduce(out, group=self.process_group)
         return out
 
@@ -499,10 +763,12 @@ class TensorParallelEmbedding(nn.Module):
         world_size = process_group.size()
         rank = process_group.rank()
 
-        block_size = num_embeddings // world_size
+        block_size = (num_embeddings + world_size - 1) // world_size
         self.min_id = rank * block_size
         self.max_id = min(num_embeddings, (rank + 1) * block_size)
-        self.null_idx = block_size
+        self.null_idx = weight.shape[
+            0
+        ]  # Usually block_size, might be less in non even vocab_size.
         self.process_group = weights.process_group
         self.reduce = reduce
 
@@ -526,6 +792,8 @@ class TensorParallelEmbedding(nn.Module):
 try:
     if IS_CUDA_SYSTEM:
         import dropout_layer_norm
+    elif IS_ROCM_SYSTEM:
+        from vllm import layernorm_ops
     else:
         dropout_layer_norm = None
 
@@ -563,10 +831,85 @@ try:
                     residual = hidden_states
 
                 return normed_hidden_states, residual
+
+    class FastRMSNorm(nn.Module):
+        def __init__(self, weight: torch.Tensor, eps: float):
+            super().__init__()
+
+            self.weight = nn.Parameter(weight)
+            self.variance_epsilon = eps
+
+        @classmethod
+        def load(cls, prefix, weights, eps=1e-6):
+            weight = weights.get_tensor(f"{prefix}.weight")
+            return cls(weight, eps)
+
+        def forward(self, hidden_states, residual=None):
+            if hidden_states.shape[-1] > 8192:
+                if residual is not None:
+                    hidden_states += residual
+                residual = hidden_states
+
+                hidden_states = hidden_states.to(torch.float32)
+                variance = hidden_states.pow(2).mean(-1, keepdim=True)
+                hidden_states = hidden_states * torch.rsqrt(
+                    variance + self.variance_epsilon
+                )
+
+                # convert into half-precision if necessary
+                if self.weight.dtype in [torch.float16, torch.bfloat16]:
+                    hidden_states = hidden_states.to(self.weight.dtype)
+
+                return self.weight * hidden_states, residual
+            elif IS_CUDA_SYSTEM:
+                # faster post attention rms norm
+                (
+                    normed_hidden_states,
+                    res,
+                    *rest,
+                ) = dropout_layer_norm.dropout_add_ln_fwd(
+                    hidden_states,
+                    residual,
+                    self.weight,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    0.0,
+                    self.variance_epsilon,
+                    1.0,
+                    0,
+                    None,
+                    False,
+                    True,  # Activate RMSNorm
+                )
+                if res is None:
+                    res = hidden_states
+
+                return normed_hidden_states, res
+            elif IS_ROCM_SYSTEM:
+                # We use VLLM RMSNorm kernel that can be compiled for RoCm, instead of Flash Attention ones that can not.
+                if residual is not None:
+                    hidden_states += residual
+                residual = hidden_states
+
+                out = torch.empty_like(hidden_states)
+                layernorm_ops.rms_norm(
+                    out,
+                    hidden_states,
+                    self.weight.data,
+                    self.variance_epsilon,
+                )
+                return out, residual
+            else:
+                raise ValueError(
+                    "Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction."
+                )
+
 except ImportError:
     pass
 
-
 try:
     if IS_CUDA_SYSTEM:
         from flash_attn.layers.rotary import RotaryEmbedding
@@ -601,7 +944,13 @@ try:
             self.scaling_factor = scaling_factor
             self.dynamic_args = None
 
-        def forward(self, query: torch.Tensor, key: torch.Tensor, cos: torch.Tensor, sin: torch.Tensor):
+        def forward(
+            self,
+            query: torch.Tensor,
+            key: torch.Tensor,
+            cos: torch.Tensor,
+            sin: torch.Tensor,
+        ):
             # Such controlflows may add some overhead.
             if IS_CUDA_SYSTEM:
                 rotary_dim = cos.shape[-1]
@@ -621,16 +970,11 @@ try:
                 head_size = query.shape[-1]
 
                 # Inplace operation, updating query and key.
-                pos_encoding_ops.rotary_embedding(
-                    query,
-                    key,
-                    head_size,
-                    cos,
-                    sin,
-                    True
-                )
+                pos_encoding_ops.rotary_embedding(query, key, head_size, cos, sin, True)
             else:
-                raise ValueError("Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction.")
+                raise ValueError(
+                    "Your system seem to be not supported. Please check your install or open an issue at https://github.com/huggingface/text-generation-inference/issues with a clear reproduction."
+                )
 
         @classmethod
         def static(cls, config, dim, base, device):
@@ -652,15 +996,16 @@ try:
                 elif rope_scaling["type"] == "yarn":
                     return YarnPositionRotaryEmbedding(
                         dim=2 * inv_freq.shape[0],
-                        max_position_embeddings=rope_scaling["original_max_position_embeddings"],
+                        max_position_embeddings=rope_scaling[
+                            "original_max_position_embeddings"
+                        ],
                         base=10000.0,
                         device=inv_freq.device,
                         scaling_factor=scaling_factor,
                         extrapolation_factor=1,
                         attn_factor=1,
                         beta_fast=32,
-                        beta_slow=1
-
+                        beta_slow=1,
                     )
                 else:
                     raise NotImplementedError(
@@ -693,15 +1038,16 @@ try:
                 elif rope_scaling["type"] == "yarn":
                     return YarnPositionRotaryEmbedding(
                         dim=2 * inv_freq.shape[0],
-                        max_position_embeddings=rope_scaling["original_max_position_embeddings"],
+                        max_position_embeddings=rope_scaling[
+                            "original_max_position_embeddings"
+                        ],
                         base=10000.0,
                         device=inv_freq.device,
                         scaling_factor=scaling_factor,
                         extrapolation_factor=1,
                         attn_factor=1,
                         beta_fast=32,
-                        beta_slow=1
-
+                        beta_slow=1,
                     )
                 else:
                     raise NotImplementedError(
@@ -755,7 +1101,7 @@ try:
             self.max_position_embeddings = max_position_embeddings
             self.base = base
 
-        def _update_cos_sin_cache(self, dtype, device, seqlen):            
+        def _update_cos_sin_cache(self, dtype, device, seqlen):
             # Reset the tables if the sequence length has changed,
             # or if we're on a new device (possibly due to tracing for instance)
             if (
@@ -780,19 +1126,27 @@ try:
                 self._cos_cached = torch.cos(freqs).to(dtype)
                 self._sin_cached = torch.sin(freqs).to(dtype)
 
-
     # Inverse dim formula to find dim based on number of rotations
     import math
-    def find_correction_dim(num_rotations, dim, base=10000, max_position_embeddings=2048):
-        return (dim * math.log(max_position_embeddings/(num_rotations * 2 * math.pi)))/(2 * math.log(base))
+
+    def find_correction_dim(
+        num_rotations, dim, base=10000, max_position_embeddings=2048
+    ):
+        return (
+            dim * math.log(max_position_embeddings / (num_rotations * 2 * math.pi))
+        ) / (2 * math.log(base))
 
     # Find dim range bounds based on rotations
-    def find_correction_range(low_rot, high_rot, dim, base=10000, max_position_embeddings=2048):
-        low = math.floor(find_correction_dim(
-            low_rot, dim, base, max_position_embeddings))
-        high = math.ceil(find_correction_dim(
-            high_rot, dim, base, max_position_embeddings))
-        return max(low, 0), min(high, dim-1)  # Clamp values just in case
+    def find_correction_range(
+        low_rot, high_rot, dim, base=10000, max_position_embeddings=2048
+    ):
+        low = math.floor(
+            find_correction_dim(low_rot, dim, base, max_position_embeddings)
+        )
+        high = math.ceil(
+            find_correction_dim(high_rot, dim, base, max_position_embeddings)
+        )
+        return max(low, 0), min(high, dim - 1)  # Clamp values just in case
 
     def linear_ramp_mask(min, max, dim):
         if min == max:
@@ -808,7 +1162,19 @@ try:
         return 0.1 * math.log(scale) + 1.0
 
     class YarnPositionRotaryEmbedding(PositionRotaryEmbedding):
-        def __init__(self, dim, max_position_embeddings, base, device, scaling_factor,*, extrapolation_factor, attn_factor, beta_fast, beta_slow):
+        def __init__(
+            self,
+            dim,
+            max_position_embeddings,
+            base,
+            device,
+            scaling_factor,
+            *,
+            extrapolation_factor,
+            attn_factor,
+            beta_fast,
+            beta_slow,
+        ):
             inv_freq = _create_inv_freq(dim, base, device)
             super().__init__(inv_freq, scaling_factor)
             self.dim = dim
@@ -818,7 +1184,9 @@ try:
             self.attn_factor = attn_factor
             self.beta_fast = beta_fast
             self.beta_slow = beta_slow
-            self.mscale = float(get_mscale(self.scaling_factor) * self.attn_factor) # Get n-d magnitude scaling corrected for interpolation
+            self.mscale = float(
+                get_mscale(self.scaling_factor) * self.attn_factor
+            )  # Get n-d magnitude scaling corrected for interpolation
 
         def _update_cos_sin_cache(self, dtype, device, seqlen):
             # Reset the tables if the sequence length has changed,
@@ -834,13 +1202,26 @@ try:
                     )
                     freqs = 1.0 / inv_freq_extrapolation
                     inv_freq_interpolation = 1.0 / (self.scaling_factor * freqs)
-                    low, high = find_correction_range(self.beta_fast, self.beta_slow, self.dim, self.base, self.max_position_embeddings)
-                    inv_freq_mask = (1 - linear_ramp_mask(low, high, self.dim // 2).float().to(device)) * self.extrapolation_factor # Get n-d rotational scaling corrected for extrapolation
-                    inv_freq = inv_freq_interpolation * (1 - inv_freq_mask) + inv_freq_extrapolation * inv_freq_mask
+                    low, high = find_correction_range(
+                        self.beta_fast,
+                        self.beta_slow,
+                        self.dim,
+                        self.base,
+                        self.max_position_embeddings,
+                    )
+                    inv_freq_mask = (
+                        1
+                        - linear_ramp_mask(low, high, self.dim // 2).float().to(device)
+                    ) * self.extrapolation_factor  # Get n-d rotational scaling corrected for extrapolation
+                    inv_freq = (
+                        inv_freq_interpolation * (1 - inv_freq_mask)
+                        + inv_freq_extrapolation * inv_freq_mask
+                    )
 
                     self.inv_freq = inv_freq
-                    self.mscale = float(get_mscale(self.scaling_factor) * self.attn_factor) # Get n-d magnitude scaling corrected for interpolation
-
+                    self.mscale = float(
+                        get_mscale(self.scaling_factor) * self.attn_factor
+                    )  # Get n-d magnitude scaling corrected for interpolation
 
                 self._seq_len_cached = seqlen
                 t = torch.arange(seqlen, device=device, dtype=self.inv_freq.dtype)
diff --git a/server/text_generation_server/utils/log.py b/server/text_generation_server/utils/log.py
new file mode 100644
index 00000000..b1456f1e
--- /dev/null
+++ b/server/text_generation_server/utils/log.py
@@ -0,0 +1,6 @@
+from functools import lru_cache
+
+
+@lru_cache(10)
+def log_once(log, msg: str):
+    log(msg)
diff --git a/server/text_generation_server/utils/logits_process.py b/server/text_generation_server/utils/logits_process.py
index c515e4d3..312583e3 100644
--- a/server/text_generation_server/utils/logits_process.py
+++ b/server/text_generation_server/utils/logits_process.py
@@ -2,8 +2,15 @@ import math
 import torch
 import habana_frameworks.torch.core as htcore
 
+from loguru import logger
+from typing import Dict, Union
+from text_generation_server.pb.generate_pb2 import GrammarType
+
+from outlines.fsm.fsm import RegexFSM
+from outlines.fsm.json_schema import build_regex_from_schema
 from functools import lru_cache
-from typing import Optional, List, Dict, Union
+from typing import List, Optional, DefaultDict
+import time
 
 from transformers import (
     LogitsWarper,
@@ -107,6 +114,60 @@ class HeterogeneousRepetitionPenaltyLogitsProcessor(LogitsProcessor):
         return None
 
 
+class FrequencyPenaltyLogitsProcessor(LogitsProcessor):
+    r"""
+    Frequency penalty as defined by OpenAI
+
+    Args:
+        penalty (`float`):
+            The parameter for frequency penalty. 0.0 means no penalty.
+    """
+
+    def __init__(self, penalty: float):
+        self.penalty = penalty
+
+    def __call__(
+        self, input_ids: torch.LongTensor, scores: torch.FloatTensor
+    ) -> torch.FloatTensor:
+        score = torch.gather(scores, 1, input_ids)
+        # if score < 0 then penalty has to be multiplied to reduce the previous token probability
+        score = -torch.where(score < 0, score * self.penalty, score / self.penalty)
+
+        return scores.scatter_add_(1, input_ids, score)
+
+
+class HeterogeneousFrequencyPenaltyLogitsProcessor(LogitsProcessor):
+    r"""
+    Frequency penalty as defined by OpenAI
+
+    Args:
+        frequency_penalty (`List[float]`):
+            The parameter for frequency penalty. 0.0 means no penalty.
+    """
+
+    def __init__(self, penalty: List[float], dtype: torch.dtype, device: torch.device):
+        self.penalty = penalty
+        self.penalty_tensor = torch.tensor(
+            penalty, dtype=dtype, device=device
+        ).unsqueeze(1)
+
+    def __call__(self, input_ids: torch.Tensor, scores: torch.Tensor) -> torch.Tensor:
+        score = torch.gather(scores, 1, input_ids)
+        # if score < 0 then penalty has to be multiplied to reduce the previous token probability
+        score = -torch.where(
+            score < 0, score * self.penalty_tensor, score / self.penalty_tensor
+        )
+
+        return scores.scatter_add_(1, input_ids, score)
+
+    def filter(self, indices):
+        self.penalty = [self.penalty[i] for i in indices]
+        if any([x != 0.0 for x in self.penalty]):
+            self.penalty_tensor = self.penalty_tensor[indices]
+            return self
+        return None
+
+
 class HeterogeneousTemperatureLogitsWarper:
     r"""
     [`LogitsWarper`] for temperature (exponential scaling output probability distribution).
@@ -379,3 +440,133 @@ class HeterogeneousProcessorWrapper(LogitsProcessor):
             self.processors = new_processors
             return self
         return None
+
+
+class GrammarLogitProcessor(LogitsProcessor):
+    fsm_state: DefaultDict[int, int]
+    fsm: RegexFSM
+
+    def __init__(self, tokenizer, device, grammar, grammar_type):
+        self.device = device
+        self.tokenizer = GrammarLogitProcessor._cached_adapt_tokenizer(tokenizer)
+        self.fsm = GrammarLogitProcessor._cached_compile_fsm(
+            grammar_type, grammar, self.tokenizer
+        )
+
+    def __call__(
+        self,
+        logits: torch.Tensor,
+        fsm_grammar_state: int,
+    ):
+        if fsm_grammar_state == -1 or self.fsm is None:
+            return logits
+        allowed_tokens = self.fsm.allowed_token_ids(fsm_grammar_state)
+        mask = torch.full_like(logits, -math.inf)
+        mask[:, allowed_tokens] = 0
+        biased_scores = logits + mask
+        return biased_scores
+
+    def advance(self, next_token_id, fsm_grammar_state):
+        return GrammarLogitProcessor._advance(
+            next_token_id, fsm_grammar_state, self.fsm
+        )
+
+    @staticmethod
+    def _advance(next_token_id, fsm_grammar_state, fsm):
+        if fsm_grammar_state == -1:
+            return fsm_grammar_state
+        return fsm.next_state(fsm_grammar_state, next_token_id)
+
+    # TODO: move grammar compilation into the router
+    @staticmethod
+    @lru_cache(maxsize=32, typed=True)
+    def _cached_compile_fsm(grammar_type, schema, tokenizer):
+        start_time = time.time()
+        if grammar_type == GrammarType.GRAMMAR_TYPE_JSON:
+            schema = build_regex_from_schema(schema)
+        elif grammar_type == GrammarType.GRAMMAR_TYPE_REGEX:
+            pass  # schema is already a regex just here for clarity
+        fsm = RegexFSM(schema, tokenizer)
+        logger.debug(f"Compiled FSM in {time.time() - start_time:.2f}s")
+        return fsm
+
+    @staticmethod
+    @lru_cache(maxsize=32, typed=True)
+    def _cached_adapt_tokenizer(tokenizer):
+        """Adapt tokenizer to work with the FSM.
+
+        The API of Outlines tokenizers is slightly different to that of
+        `transformers`. In addition we need to handle the missing spaces to
+        Llama's tokenizer to be able to compile FSMs for this model.
+
+        """
+        start_time = time.time()
+        tokenizer.vocabulary = tokenizer.get_vocab()
+        tokenizer.special_tokens = set(tokenizer.all_special_tokens)
+
+        def convert_token_to_string(token: str) -> str:
+            from transformers.file_utils import SPIECE_UNDERLINE
+
+            string = tokenizer.convert_tokens_to_string([token])
+
+            # A hack to handle missing spaces to HF's Llama tokenizers
+            if token.startswith(SPIECE_UNDERLINE) or token == "<0x20>":
+                return " " + string
+
+            return string
+
+        tokenizer.convert_token_to_string = convert_token_to_string
+        logger.debug(f"Adapted tokenizer in {time.time() - start_time:.2f}s")
+        return tokenizer
+
+
+class HeterogeneousGrammarLogitProcessor(LogitsProcessor):
+    def __init__(self, tokenizer, device, grammars, grammar_types):
+        self.device = device
+        self.tokenizer = GrammarLogitProcessor._cached_adapt_tokenizer(tokenizer)
+        self.fsms = []
+        for grammar, grammar_type in zip(grammars, grammar_types):
+            if len(grammar) == 0:
+                self.fsms.append(None)
+                continue
+            fsm = GrammarLogitProcessor._cached_compile_fsm(
+                grammar_type, grammar, self.tokenizer
+            )
+            self.fsms.append(fsm)
+
+    def __call__(
+        self,
+        logits: torch.Tensor,
+        fsm_grammar_states: List[int],
+    ):
+        mask = torch.full_like(logits, -math.inf)
+        for i in range(logits.shape[0]):
+            fsm = self.fsms[i]
+            if fsm_grammar_states[i] == -1 or fsm is None:
+                continue
+            allowed_tokens = fsm.allowed_token_ids(fsm_grammar_states[i])
+            mask[i, allowed_tokens] = 0
+            logits[i] += mask[i]
+        return logits
+
+    def advance_batch(self, next_token_ids, fsm_grammar_states):
+        return [
+            GrammarLogitProcessor._advance(
+                next_token_ids[i], fsm_grammar_states[i], self.fsms[i]
+            )
+            for i in range(len(next_token_ids))
+        ]
+
+    def advance_at_index(self, next_token_id, fsm_grammar_state, index):
+        if self.fsms[index] is None:
+            return fsm_grammar_state
+        return GrammarLogitProcessor._advance(
+            next_token_id, fsm_grammar_state, self.fsms[index]
+        )
+
+    def filter(self, indices):
+        new_fsms = []
+        for i in indices:
+            new_fsms.append(self.fsms[i])
+        self.fsms = new_fsms
+        return self
diff --git a/server/text_generation_server/utils/paged_attention.py b/server/text_generation_server/utils/paged_attention.py
index 57a59599..18e605b0 100644
--- a/server/text_generation_server/utils/paged_attention.py
+++ b/server/text_generation_server/utils/paged_attention.py
@@ -1,29 +1,31 @@
 import torch
 
 # vllm imports
-from vllm import cache_ops
-from vllm import attention_ops
+from vllm._C import cache_ops, ops
 
 _PARTITION_SIZE = 512
 
 
-def reshape_and_cache(key: torch.Tensor, value: torch.Tensor, key_cache: torch.Tensor, value_cache: torch.Tensor,
-                      slots: torch.Tensor):
-    cache_ops.reshape_and_cache(
-        key, value, key_cache, value_cache, slots
-    )
+def reshape_and_cache(
+    key: torch.Tensor,
+    value: torch.Tensor,
+    key_cache: torch.Tensor,
+    value_cache: torch.Tensor,
+    slots: torch.Tensor,
+):
+    cache_ops.reshape_and_cache(key, value, key_cache, value_cache, slots, "auto", 1.0)
 
 
 def attention(
-        out: torch.Tensor,
-        query: torch.Tensor,
-        key_cache: torch.Tensor,
-        value_cache: torch.Tensor,
-        kv_head_mapping: torch.Tensor,
-        softmax_scale: float,
-        block_tables: torch.Tensor,
-        input_lengths: torch.Tensor,
-        max_s: int,
+    out: torch.Tensor,
+    query: torch.Tensor,
+    key_cache: torch.Tensor,
+    value_cache: torch.Tensor,
+    kv_head_mapping: torch.Tensor,
+    softmax_scale: float,
+    block_tables: torch.Tensor,
+    input_lengths: torch.Tensor,
+    max_s: int,
 ):
     # Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py
     # Copyright 2023 The vLLM team. All rights
@@ -45,17 +47,15 @@ def attention(
     # value_cache => [num_blocks, num_heads, head_size, block_size]
     block_size = value_cache.shape[3]
     num_seqs, num_heads, head_size = query.shape
-    max_num_partitions = (
-            (max_s + _PARTITION_SIZE - 1) //
-            _PARTITION_SIZE)
+    max_num_partitions = (max_s + _PARTITION_SIZE - 1) // _PARTITION_SIZE
     # NOTE(woosuk): We use a simple heuristic to decide whether to use
     # PagedAttention V1 or V2. If the number of partitions is 1, we use
     # V1 to avoid the overhead of reduction. Also, if the number of
     # sequences or heads is large, we use V1 since there is enough work
     # to parallelize.
-    use_v1 = max_num_partitions == 1 or num_seqs * num_heads > 512
+    use_v1 = max_s <= 8192 and (max_num_partitions == 1 or num_seqs * num_heads > 512)
     if use_v1:
-        attention_ops.paged_attention_v1(
+        ops.paged_attention_v1(
             out,
             query,
             key_cache,
@@ -67,6 +67,8 @@ def attention(
             block_size,
             max_s,
             None,
+            "auto",
+            1.0,
         )
     else:
         # Run PagedAttention V2.
@@ -82,7 +84,7 @@ def attention(
             device=out.device,
         )
         max_logits = torch.empty_like(exp_sums)
-        attention_ops.paged_attention_v2(
+        ops.paged_attention_v2(
             out,
             exp_sums,
             max_logits,
@@ -97,4 +99,6 @@ def attention(
             block_size,
             max_s,
             None,
+            "auto",
+            1.0,
         )
diff --git a/server/text_generation_server/utils/peft.py b/server/text_generation_server/utils/peft.py
index d37e8940..48ca264b 100644
--- a/server/text_generation_server/utils/peft.py
+++ b/server/text_generation_server/utils/peft.py
@@ -10,8 +10,7 @@ from peft import AutoPeftModelForCausalLM, AutoPeftModelForSeq2SeqLM
 def download_and_unload_peft(model_id, revision, trust_remote_code):
     torch_dtype = torch.float16
 
-    logger.info("Peft model detected.")
-    logger.info("Loading the model it might take a while without feedback")
+    logger.info("Trying to load a Peft model. It might take a while without feedback")
     try:
         model = AutoPeftModelForCausalLM.from_pretrained(
             model_id,
@@ -28,7 +27,7 @@ def download_and_unload_peft(model_id, revision, trust_remote_code):
             trust_remote_code=trust_remote_code,
             low_cpu_mem_usage=True,
         )
-    logger.info(f"Loaded.")
+    logger.info("Peft model detected.")
     logger.info(f"Merging the lora weights.")
 
     base_model_id = model.peft_config["default"].base_model_name_or_path
@@ -38,7 +37,9 @@ def download_and_unload_peft(model_id, revision, trust_remote_code):
     os.makedirs(model_id, exist_ok=True)
     cache_dir = model_id
     logger.info(f"Saving the newly created merged model to {cache_dir}")
-    tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=trust_remote_code)
+    tokenizer = AutoTokenizer.from_pretrained(
+        base_model_id, trust_remote_code=trust_remote_code
+    )
     model.save_pretrained(cache_dir, safe_serialization=True)
     model.config.save_pretrained(cache_dir)
     tokenizer.save_pretrained(cache_dir)
diff --git a/server/text_generation_server/utils/speculate.py b/server/text_generation_server/utils/speculate.py
new file mode 100644
index 00000000..a1b37a34
--- /dev/null
+++ b/server/text_generation_server/utils/speculate.py
@@ -0,0 +1,11 @@
+SPECULATE = None
+
+
+def get_speculate() -> int:
+    global SPECULATE
+    return SPECULATE
+
+
+def set_speculate(speculate: int):
+    global SPECULATE
+    SPECULATE = speculate
diff --git a/server/text_generation_server/utils/tokens.py b/server/text_generation_server/utils/tokens.py
index c50d10e3..c879e312 100644
--- a/server/text_generation_server/utils/tokens.py
+++ b/server/text_generation_server/utils/tokens.py
@@ -1,18 +1,23 @@
 # Copyright (C) 2024 Habana Labs, Ltd. an Intel Company.
 
 import re
-from typing import Callable, List, Optional, Tuple
+from typing import List, Optional, Tuple
 
+import math
 import torch
 from text_generation_server.pb import generate_pb2
-from text_generation_server.pb.generate_pb2 import FinishReason
+from text_generation_server.pb.generate_pb2 import FinishReason, GrammarType
 from text_generation_server.utils.logits_process import (
+    FrequencyPenaltyLogitsProcessor,
+    GrammarLogitProcessor,
     HeterogeneousProcessorWrapper,
     HeterogeneousRepetitionPenaltyLogitsProcessor,
+    HeterogeneousFrequencyPenaltyLogitsProcessor,
     HeterogeneousTemperatureLogitsWarper,
     HeterogeneousTopKLogitsWarper,
     HeterogeneousTopPLogitsWarper,
     HeterogeneousTypicalLogitsWarper,
+    HeterogeneousGrammarLogitProcessor,
     static_warper,
 )
 from text_generation_server.utils.watermark import WatermarkLogitsProcessor
@@ -26,17 +31,37 @@ class NextTokenChooser:
         watermark=False,
         temperature=1.0,
         repetition_penalty=1.0,
+        frequency_penalty=0.0,
         top_k=None,
         top_p=None,
         typical_p=None,
         do_sample=False,
         seed=0,
         device="cpu",
+        tokenizer: Optional[PreTrainedTokenizerBase] = None,
+        grammar: str = "",
+        grammar_type: GrammarType = GrammarType.GRAMMAR_TYPE_NONE,
+        fsm_grammar_state: int = 0,
     ):
-        self.watermark_processor = WatermarkLogitsProcessor(device=device) if watermark else None
-        self.repetition_processor = (
-            RepetitionPenaltyLogitsProcessor(penalty=repetition_penalty) if repetition_penalty else None
+        self.watermark_processor = (
+            WatermarkLogitsProcessor(device=device) if watermark else None
         )
+        self.repetition_processor = (
+            RepetitionPenaltyLogitsProcessor(penalty=repetition_penalty)
+            if repetition_penalty and repetition_penalty != 1.0
+            else None
+        )
+        self.frequency_processor = (
+            FrequencyPenaltyLogitsProcessor(penalty=frequency_penalty)
+            if frequency_penalty and frequency_penalty != 0.0
+            else None
+        )
+        self.grammar_processor = (
+            GrammarLogitProcessor(tokenizer, device, grammar, grammar_type)
+            if grammar != ""
+            else None
+        )
+        self.tokenizer = tokenizer
 
         has_warpers = (
             (temperature is not None and temperature != 1.0)
@@ -50,13 +75,20 @@ class NextTokenChooser:
             self.static_warper = None
 
         sampling = do_sample or has_warpers
+
         self.choice = Sampling(seed, device) if sampling else Greedy()
+        self.fsm_grammar_state = fsm_grammar_state
+        self.grammar = grammar
 
     def __call__(self, input_ids, scores):
         if self.watermark_processor is not None:
             scores = self.watermark_processor(input_ids, scores)
         if self.repetition_processor is not None:
             scores = self.repetition_processor(input_ids, scores)
+        if self.frequency_processor is not None:
+            scores = self.frequency_processor(input_ids, scores)
+        if self.grammar_processor is not None:
+            scores = self.grammar_processor(scores, self.fsm_grammar_state)
 
         if self.static_warper is None:
             next_logprob = torch.log_softmax(scores, -1)
@@ -67,29 +99,41 @@ class NextTokenChooser:
 
         return next_id, next_logprob
 
+    def advance_grammar(self, next_id: int):
+        if self.grammar_processor is not None:
+            self.fsm_grammar_state = self.grammar_processor.advance(
+                next_id, self.fsm_grammar_state
+            )
+        return self
+
     @classmethod
     def from_pb(
         cls,
         pb: generate_pb2.NextTokenChooserParameters,
         device: torch.device,
+        tokenizer: PreTrainedTokenizerBase,
     ) -> "NextTokenChooser":
         return NextTokenChooser(
             watermark=pb.watermark,
             temperature=pb.temperature,
             repetition_penalty=pb.repetition_penalty,
+            frequency_penalty=pb.frequency_penalty,
             top_k=pb.top_k,
             top_p=pb.top_p,
             typical_p=pb.typical_p,
             do_sample=pb.do_sample,
             seed=pb.seed,
             device=device,
+            tokenizer=tokenizer,
+            grammar=pb.grammar,
+            grammar_type=pb.grammar_type,
         )
 
 
 class StopSequenceCriteria:
     def __init__(self, stop_sequence: str):
         stop_sequence = re.escape(stop_sequence)
-        self.regex = re.compile(f".*{stop_sequence}$")
+        self.regex = re.compile(f"{stop_sequence}$")
 
     def __call__(self, output: str) -> bool:
         if self.regex.findall(output):
@@ -120,10 +164,15 @@ class StoppingCriteria:
         if not self.ignore_eos_token and last_token == self.eos_token_id:
             return True, FinishReason.FINISH_REASON_EOS_TOKEN
 
-        self.current_output += last_output
-        for stop_sequence_criteria in self.stop_sequence_criterias:
-            if stop_sequence_criteria(self.current_output):
-                return True, FinishReason.FINISH_REASON_STOP_SEQUENCE
+        if self.stop_sequence_criterias:
+            self.current_output += last_output
+            # There is no need to keep an output that is too long
+            if len(self.current_output) > 300:
+                # Slice to -200 to avoid doing it all the time
+                self.current_output = self.current_output[-200:]
+            for stop_sequence_criteria in self.stop_sequence_criterias:
+                if stop_sequence_criteria(self.current_output):
+                    return True, FinishReason.FINISH_REASON_STOP_SEQUENCE
 
         return False, None
 
@@ -142,6 +191,30 @@ class StoppingCriteria:
         )
 
 
+def create_n_gram_speculation(
+    input_ids: torch.Tensor,
+    next_ids: torch.Tensor,
+    accepted_ids: torch.Tensor,
+    speculate: int,
+    verbose: bool,
+):
+    # Very trivial approach, find first match in the string.
+    # This is much less refined than actual n-gram but seems to work
+    # relatively OK in grounded mode and is by far much faster with
+    # much less worst case complexity as everything happens on device.
+    B = accepted_ids.shape[0]
+    device = input_ids.device
+    seeds = next_ids[accepted_ids.cumsum(dim=-1) - 1]
+    indices = (input_ids == seeds.unsqueeze(-1)).max(dim=1).indices + 1
+    all_indices = indices.unsqueeze(-1).expand(B, speculate) + torch.arange(
+        speculate, device=device
+    )
+    all_indices = torch.clamp(all_indices, max=input_ids.shape[1] - 1)
+
+    speculative_ids = input_ids.gather(dim=-1, index=all_indices)
+    return speculative_ids
+
+
 class HeterogeneousNextTokenChooser:
     def __init__(
         self,
@@ -150,11 +223,16 @@ class HeterogeneousNextTokenChooser:
         watermark: List[bool],
         temperature: List[float],
         repetition_penalty: List[float],
+        frequency_penalty: List[float],
         top_k: List[int],
         top_p: List[float],
         typical_p: List[float],
         do_sample: List[bool],
         seeds: List[int],
+        tokenizer: PreTrainedTokenizerBase,
+        grammars: List[str],
+        grammar_types: List[int],
+        fsm_grammar_states: List[int],
         quantization_enabled: bool,
     ):
         warpers = []
@@ -173,14 +251,36 @@ class HeterogeneousNextTokenChooser:
         )
 
         self.repetition_processor = (
-            HeterogeneousRepetitionPenaltyLogitsProcessor(repetition_penalty, dtype, device)
+            HeterogeneousRepetitionPenaltyLogitsProcessor(
+                repetition_penalty, dtype, device
+            )
             if any([x != 1.0 for x in repetition_penalty])
             else None
         )
 
+        self.frequency_processor = (
+            HeterogeneousFrequencyPenaltyLogitsProcessor(
+                frequency_penalty, dtype, device
+            )
+            if any([x != 0.0 for x in frequency_penalty])
+            else None
+        )
+
+        self.grammar_processor = (
+            HeterogeneousGrammarLogitProcessor(
+                tokenizer, device, grammars, grammar_types
+            )
+            if any([grammar != "" for grammar in grammars])
+            else None
+        )
+
         if any([x != 1.0 for x in temperature]):
-            do_sample = [sample or x != 1.0 for x, sample in zip(temperature, do_sample)]
-            warpers.append(HeterogeneousTemperatureLogitsWarper(temperature, dtype, device))
+            do_sample = [
+                sample or x != 1.0 for x, sample in zip(temperature, do_sample)
+            ]
+            warpers.append(
+                HeterogeneousTemperatureLogitsWarper(temperature, dtype, device)
+            )
 
         if any([x != 0 for x in top_k]):
             do_sample = [sample or x != 0 for x, sample in zip(top_k, do_sample)]
@@ -205,26 +305,118 @@ class HeterogeneousNextTokenChooser:
         self.do_sample = do_sample
         self.dtype = dtype
         self.device = device
+        self.tokenizer = tokenizer
+        self.fsm_grammar_states = fsm_grammar_states
+        self.grammars = grammars
+        self.grammar_types = grammar_types
 
-    def __call__(self, input_ids: torch.Tensor, scores: torch.Tensor):
-        if self.watermark_processor is not None:
-            scores = self.watermark_processor(input_ids, scores)
-        if self.repetition_processor is not None:
-            scores = self.repetition_processor(input_ids, scores)
-
-        for warper in self.warpers:
-            scores = warper(input_ids, scores)
-
-        next_ids = self.choice(scores)
-        # ignore logprobs if we use greedy search
-        if type(self.choice) == Greedy:
-            logprobs = torch.empty_like(scores, device="cpu")
-            next_logprobs = torch.empty_like(next_ids.view(-1), device="cpu")
+    def __call__(
+        self,
+        input_ids: torch.Tensor,
+        scores: torch.Tensor,
+        speculate: int,
+        speculated_ids: Optional[torch.Tensor] = None,
+        speculative_scores: Optional[torch.Tensor] = None,
+        verbose=False,
+    ):
+        if speculated_ids is not None:
+            B = scores.shape[0] // (speculated_ids.shape[1] + 1)
+            S = speculated_ids.shape[1] + 1
+            scores = scores.view(B, S, -1)
         else:
-            logprobs = torch.log_softmax(scores, -1)
-            next_logprobs = torch.gather(logprobs, 1, next_ids.view(-1, 1)).view(-1)
+            B = scores.shape[0]
+            S = 1
+            scores = scores.view(B, S, -1)
 
-        return next_ids, next_logprobs, logprobs
+        next_ids = torch.zeros((B, S), device=scores.device, dtype=torch.long)
+
+        for j in range(S):
+            _scores = scores[:, j]
+            if self.watermark_processor is not None:
+                _scores = self.watermark_processor(input_ids, _scores)
+            if self.repetition_processor is not None:
+                _scores = self.repetition_processor(input_ids, _scores)
+            if self.frequency_processor is not None:
+                _scores = self.frequency_processor(input_ids, _scores)
+            if self.grammar_processor is not None:
+                _scores = self.grammar_processor(_scores, self.fsm_grammar_states)
+            for warper in self.warpers:
+                _scores = warper(input_ids, _scores)
+            _next_ids = self.choice(_scores)
+            scores[:, j] = _scores
+            next_ids[:, j] = _next_ids
+        next_ids = next_ids.view(B * S)
+        allscores = scores.view(B * S, -1)
+        alllogprobs = torch.log_softmax(allscores, -1)
+
+        if speculated_ids is not None:
+            accepted_ids = []
+            B = next_ids.shape[0] // (speculated_ids.shape[1] + 1)
+            S = speculated_ids.shape[1] + 1
+            indices = []
+            for i in range(B):
+                _next_ids = next_ids[i * S : (i + 1) * S]
+                _speculated_ids = speculated_ids[i]
+                validate_speculative = _next_ids[:-1] == _speculated_ids
+                index = i * S
+                accepted = 1
+                # First is always valid
+                indices.append(index)
+                for valid in validate_speculative.tolist():
+                    if valid:
+                        index += 1
+                        accepted += 1
+                        indices.append(index)
+                    else:
+                        break
+                accepted_ids.append(accepted)
+
+            accepted_ids = torch.tensor(
+                accepted_ids, device=input_ids.device, dtype=input_ids.dtype
+            )
+            next_ids = next_ids[indices]
+            logprobs = alllogprobs[indices]
+            indices = torch.arange(B, device=input_ids.device) * S
+            if speculative_scores is not None:
+                speculative_scores = speculative_scores[indices + accepted_ids - 1]
+        else:
+            accepted_ids = torch.ones_like(next_ids)
+            logprobs = alllogprobs
+
+        next_logprobs = torch.gather(logprobs, 1, next_ids.view(-1, 1)).view(-1)
+
+        if speculate > 0:
+            if speculative_scores is not None:
+                # Medusa provided some scores
+                speculative_ids = Greedy()(speculative_scores)
+            else:
+                # n-gram
+                speculative_ids = create_n_gram_speculation(
+                    input_ids, next_ids, accepted_ids, speculate, verbose
+                )
+        else:
+            speculative_ids = None
+
+        return next_ids, next_logprobs, alllogprobs, accepted_ids, speculative_ids
+
+    def advance_grammar(self, next_ids: List[int]):
+        if self.grammar_processor is not None:
+            other_new_states = self.grammar_processor.advance_batch(
+                next_ids, self.fsm_grammar_states
+            )
+            self.fsm_grammar_states = other_new_states
+        return self
+
+    def advance_grammar_single(self, grammar_state_index: int, next_id: int):
+        if self.grammar_processor is not None:
+            self.fsm_grammar_states[grammar_state_index] = (
+                self.grammar_processor.advance_at_index(
+                    next_id,
+                    self.fsm_grammar_states[grammar_state_index],
+                    grammar_state_index,
+                )
+            )
+        return self
 
     def filter(self, indices):
         if self.watermark_processor is not None:
@@ -233,6 +425,12 @@ class HeterogeneousNextTokenChooser:
         if self.repetition_processor is not None:
             self.repetition_processor = self.repetition_processor.filter(indices)
 
+        if self.frequency_processor is not None:
+            self.frequency_processor = self.frequency_processor.filter(indices)
+
+        if self.grammar_processor is not None:
+            self.grammar_processor = self.grammar_processor.filter(indices)
+
         filtered_warpers = []
         for warper in self.warpers:
             filtered_warper = warper.filter(indices)
@@ -243,6 +441,18 @@ class HeterogeneousNextTokenChooser:
         self.seeds = [self.seeds[i] for i in indices]
         self.do_sample = [self.do_sample[i] for i in indices]
 
+        new_grammars = []
+        new_fsm_grammar_states = []
+        new_grammar_types = []
+        for i in indices:
+            new_grammars.append(self.grammars[i])
+            new_fsm_grammar_states.append(self.fsm_grammar_states[i])
+            new_grammar_types.append(self.grammar_types[i])
+
+        self.grammars = new_grammars
+        self.fsm_grammar_states = new_fsm_grammar_states
+        self.grammar_types = new_grammar_types
+
         if any(self.do_sample):
             self.choice.filter(indices)
         else:
@@ -256,12 +466,15 @@ class HeterogeneousNextTokenChooser:
         pb: List[generate_pb2.NextTokenChooserParameters],
         dtype: torch.dtype,
         device: torch.device,
-        quantization_enabled: bool,
+        tokenizer: PreTrainedTokenizerBase,
+        fsm_grammar_states: Optional[List[int]] = None,
+        quantization_enabled: bool = False,
     ) -> "HeterogeneousNextTokenChooser":
         return HeterogeneousNextTokenChooser(
             watermark=[pb_.watermark for pb_ in pb],
             temperature=[pb_.temperature for pb_ in pb],
             repetition_penalty=[pb_.repetition_penalty for pb_ in pb],
+            frequency_penalty=[pb_.frequency_penalty for pb_ in pb],
             top_k=[pb_.top_k for pb_ in pb],
             top_p=[pb_.top_p for pb_ in pb],
             typical_p=[pb_.typical_p for pb_ in pb],
@@ -269,6 +482,12 @@ class HeterogeneousNextTokenChooser:
             seeds=[pb_.seed for pb_ in pb],
             device=device,
             dtype=dtype,
+            tokenizer=tokenizer,
+            grammars=[pb_.grammar for pb_ in pb],
+            grammar_types=[pb_.grammar_type for pb_ in pb],
+            fsm_grammar_states=(
+                fsm_grammar_states if fsm_grammar_states else [0] * len(pb)
+            ),
             quantization_enabled=quantization_enabled,
         )
 
@@ -335,8 +554,11 @@ class HeterogeneousSampling:
 
 
 def batch_top_tokens(
-    top_n_tokens: List[int], top_n_tokens_tensor: torch.Tensor, logprobs: torch.Tensor
-) -> Tuple[List[List[int]], List[List[float]]]:
+    top_n_tokens: List[int],
+    top_n_tokens_tensor: torch.Tensor,
+    logprobs: torch.Tensor,
+    accepted_ids: torch.Tensor,
+) -> Tuple[List[List[List[int]]], List[List[List[float]]]]:
     """Find the top n most likely tokens for a batch of generations.
 
     When multiple tokens have equal probabilities and they don't all fit, the
@@ -345,15 +567,25 @@ def batch_top_tokens(
     max_top_n = max(top_n_tokens)
     # Early exit when top_n_tokens is not used
     if max_top_n == 0:
-        return [[]] * len(top_n_tokens), [[]] * len(top_n_tokens)
+        return [[[]]] * len(top_n_tokens), [[[]]] * len(top_n_tokens)
 
+    batch_size = accepted_ids.shape[0]
+    speculate_size = logprobs.shape[0] // batch_size
+    top_n_tokens_tensor = top_n_tokens_tensor.repeat_interleave(speculate_size)
     # Ensure top_n doesn't exceed vocab size
-    top_n_tokens = [min(tok, logprobs.size(-1)) for tok in top_n_tokens]
+    top_n_tokens = [
+        min(tok, logprobs.size(-1))
+        for tok in top_n_tokens
+        for _ in range(speculate_size)
+    ]
 
     # Parallel kthvalue adapted from https://discuss.pytorch.org/t/how-to-efficiently-get-the-k-th-largest-values-in-parallel/160529/2
     # Sorted topk is faster than torch.sort() since we only need a small subset
-    sorted_top_k = torch.topk(logprobs, k=max_top_n, dim=1, sorted=True).values
-    nth_highest = torch.gather(sorted_top_k, 1, (top_n_tokens_tensor - 1).clip(min=0).unsqueeze(1))
+    sorted_top_k = torch.topk(logprobs, k=max_top_n, dim=-1, sorted=True).values
+
+    nth_highest = torch.gather(
+        sorted_top_k, 1, (top_n_tokens_tensor - 1).clip(min=0).unsqueeze(1)
+    )
     nth_highest[nth_highest == -float("inf")] = torch.finfo(logprobs.dtype).min
 
     # Find the new "fuzzy" top n values
@@ -367,10 +599,38 @@ def batch_top_tokens(
     top_indices = top_k.indices.tolist()
     top_values = top_k.values.tolist()
 
-    return (
-        [idxs[:n] if req_n > 0 else [] for idxs, n, req_n in zip(top_indices, top_n_ishes, top_n_tokens)],
-        [vals[:n] if req_n > 0 else [] for vals, n, req_n in zip(top_values, top_n_ishes, top_n_tokens)],
-    )
+    batch_top_token_ids = []
+    batch_top_token_logprobs = []
+    accepted_ids_list = accepted_ids.tolist()
+    for i, n_accepted_ids in enumerate(accepted_ids_list):
+        start = speculate_size * i
+        stop = speculate_size * (i + 1)
+        _top_indices = top_indices[start:stop]
+        _top_values = top_values[start:stop]
+        _top_n_ishes = top_n_ishes[start:stop]
+        _top_n_tokens = top_n_tokens[start:stop]
+
+        _top_indices = _top_indices[:n_accepted_ids]
+        _top_values = _top_values[:n_accepted_ids]
+        _top_n_ishes = _top_n_ishes[:n_accepted_ids]
+        _top_n_tokens = _top_n_tokens[:n_accepted_ids]
+
+        row_top_token_ids = []
+        row_top_token_logprobs = []
+
+        for idxs, vals, n, req_n in zip(
+            _top_indices, _top_values, _top_n_ishes, _top_n_tokens
+        ):
+            indices = idxs[:n] if req_n > 0 else []
+            values = vals[:n] if req_n > 0 else []
+
+            row_top_token_ids.append(indices)
+            row_top_token_logprobs.append(values)
+
+        batch_top_token_ids.append(row_top_token_ids)
+        batch_top_token_logprobs.append(row_top_token_logprobs)
+
+    return batch_top_token_ids, batch_top_token_logprobs
 
 
 def make_tokenizer_optional(tokenizer):
@@ -398,8 +658,8 @@ def make_tokenizer_optional(tokenizer):
                           for inner_text in text]
             if padding == "longest":
                 max_length = max(len(tokens) for tokens in all_tokens)
-            return {"input_ids": torch.tensor([[tokenizer.pad_token_id] * (max_length - len(tokens)) + tokens for tokens in all_tokens], dtype=torch.int32),
-                    "attention_mask": torch.tensor([[0] * (max_length - len(tokens)) + [1] * len(tokens) for tokens in all_tokens], dtype=torch.int32)}
+            return {"input_ids": torch.tensor([[tokenizer.pad_token_id] * (max_length - len(tokens)) + tokens for tokens in all_tokens]),
+                    "attention_mask": torch.tensor([[0] * (max_length - len(tokens)) + [1] * len(tokens) for tokens in all_tokens])}
 
         def decode(
             self,
diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py
index f3344988..d0614346 100644
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@@ -6,6 +6,7 @@ import torch
 from loguru import logger
 from huggingface_hub import hf_hub_download
 import json
+from text_generation_server.utils.log import log_once
 
 
 class Weights:
@@ -16,7 +17,7 @@ class Weights:
         dtype,
         process_group,
         aliases: Optional[Dict[str, List[str]]] = None,
-        prefix: Optional[str] = None
+        prefix: Optional[str] = None,
     ):
         routing = {}
         for filename in filenames:
@@ -45,7 +46,6 @@ class Weights:
         return self._handles[filename]
 
     def get_filename(self, tensor_name: str) -> (str, str):
-
         names = [tensor_name]
         if self.prefix is not None:
             prefixed = f"{self.prefix}.{tensor_name}"
@@ -91,7 +91,7 @@ class Weights:
         rank = self.process_group.rank()
 
         size = slice_.get_shape()[dim]
-        block_size = size // world_size
+        block_size = (size + world_size - 1) // world_size
         start = rank * block_size
         stop = (rank + 1) * block_size
 
@@ -153,15 +153,30 @@ class Weights:
                     f"Cannot load `{quantize}` weight, make sure the model is already quantized."
                 )
 
+            bits, groupsize, _, quant_method = self._get_gptq_params()
+
             qzeros = self._get_qweight(f"{prefix}.qzeros")
             scales = self._get_qweight(f"{prefix}.scales")
             scales = scales.to(dtype=self.dtype)
-            if quantize == "gptq":
+
+            if quantize == "gptq" and quant_method == "gptq":
                 g_idx = self.get_tensor(f"{prefix}.g_idx")
+            elif quantize == "gptq" and quant_method == "awq":
+                log_once(
+                    logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
+                )
+                from text_generation_server.utils.awq.conversion_utils import (
+                    fast_awq_to_gptq,
+                )
+
+                qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
+                g_idx = (
+                    torch.arange(qweight.shape[0] * (32 // bits), device=qweight.device)
+                    // groupsize
+                ).to(dtype=torch.int32)
             else:
                 g_idx = None
 
-            bits, groupsize = self._get_gptq_params()
             weight = (qweight, qzeros, scales, g_idx, bits, groupsize, False)
         else:
             slice_ = self._get_slice(f"{prefix}.weight")
@@ -203,17 +218,40 @@ class Weights:
                 [self.get_sharded(f"{p}.scales", dim=1) for p in prefixes], dim=1
             )
 
-            if quantize == "gptq":
+            bits, groupsize, desc_act, quant_method = self._get_gptq_params()
+
+            from text_generation_server.utils.layers import HAS_EXLLAMA
+
+            use_exllama = (
+                bits == 4 and HAS_EXLLAMA and quantize == "gptq" and not desc_act
+            )
+
+            if quantize == "gptq" and quant_method == "gptq":
                 w = [self.get_tensor(f"{p}.g_idx") for p in prefixes]
                 for w2 in w[1:]:
                     torch.testing.assert_close(w2, w[0])
                 g_idx = w[0]
+            elif quantize == "gptq" and quant_method == "awq":
+                log_once(
+                    logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
+                )
+                from text_generation_server.utils.awq.conversion_utils import (
+                    fast_awq_to_gptq,
+                )
+
+                qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
+                if use_exllama:
+                    g_idx = None
+                else:
+                    g_idx = (
+                        torch.arange(
+                            qweight.shape[0] * (32 // bits), device=qweight.device
+                        )
+                        // groupsize
+                    ).to(dtype=torch.int32)
             else:
                 g_idx = None
 
-            bits, groupsize = self._get_gptq_params()
-            from text_generation_server.utils.layers import HAS_EXLLAMA
-            use_exllama = bits==4  and HAS_EXLLAMA and quantize == "gptq"
             weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
         else:
             w = [self.get_sharded(f"{p}.weight", dim=0) for p in prefixes]
@@ -239,13 +277,28 @@ class Weights:
     def get_multi_weights_row(self, prefix: str, quantize: str):
         if quantize == "gptq":
             use_exllama = True
-            bits, groupsize = self._get_gptq_params()
+            bits, groupsize, desc_act, quant_method = self._get_gptq_params()
 
             if bits != 4:
                 use_exllama = False
 
+            if desc_act:
+                log_once(logger.warning, "Disabling exllama because desc_act=True")
+                use_exllama = False
+
+            try:
+                qweight = self.get_sharded(f"{prefix}.qweight", dim=0)
+            except RuntimeError:
+                raise RuntimeError(
+                    "Cannot load `gptq` weight, make sure the model is already quantized, or quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
+                )
+
+            if quant_method == "gptq":
+                g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0)
+            elif quant_method == "awq":
+                g_idx = None
+
             if self.process_group.size() > 1:
-                g_idx = self.get_tensor(f"{prefix}.g_idx")
                 if g_idx is not None:
                     if (
                         not torch.equal(
@@ -261,40 +314,51 @@ class Weights:
                         # it would require to reorder input activations that are split unto several GPUs
                         use_exllama = False
 
-            try:
-                qweight = self.get_sharded(f"{prefix}.qweight", dim=0)
-            except RuntimeError:
-                raise RuntimeError(
-                    "Cannot load `gptq` weight, make sure the model is already quantized, or quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
-                )
-
             from text_generation_server.utils.layers import HAS_EXLLAMA, CAN_EXLLAMA
 
             if use_exllama:
                 if not HAS_EXLLAMA:
                     if CAN_EXLLAMA:
-                        logger.warning(
-                            "Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
+                        log_once(
+                            logger.warning,
+                            "Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True",
                         )
                     use_exllama = False
                 else:
-                    logger.info(f"Using exllama kernels v{HAS_EXLLAMA}")
+                    log_once(logger.info, f"Using exllama kernels v{HAS_EXLLAMA}")
 
-            if use_exllama:
+            if use_exllama and groupsize != -1:
                 qzeros = self.get_sharded(f"{prefix}.qzeros", dim=0)
                 scales = self.get_sharded(f"{prefix}.scales", dim=0)
-                g_idx = self.get_sharded(f"{prefix}.g_idx", dim= 0)
-                g_idx = g_idx - g_idx[0]
             else:
-                # The triton kernel reorders the scales/zero points instead of the weight/activation.
-                # Thus, each rank needs the full qzeros/scales.
                 qzeros = self.get_tensor(f"{prefix}.qzeros")
                 scales = self.get_tensor(f"{prefix}.scales")
-                g_idx = self.get_sharded(f"{prefix}.g_idx", dim=0)
+
+            if use_exllama and g_idx is not None:
+                g_idx = g_idx - g_idx[0]
+
+            if quant_method == "awq":
+                log_once(
+                    logger.info, "Converting AWQ model to Exllama/GPTQ packing format."
+                )
+                from text_generation_server.utils.awq.conversion_utils import (
+                    fast_awq_to_gptq,
+                )
+
+                qweight, qzeros = fast_awq_to_gptq(qweight, qzeros)
+                if use_exllama:
+                    g_idx = None
+                else:
+                    g_idx = (
+                        torch.arange(
+                            qweight.shape[0] * (32 // bits), device=qweight.device
+                        )
+                        // groupsize
+                    ).to(dtype=torch.int32)
 
             weight = (qweight, qzeros, scales, g_idx, bits, groupsize, use_exllama)
         elif quantize == "awq":
-            bits, groupsize = self._get_gptq_params()
+            bits, groupsize, _, _ = self._get_gptq_params()
 
             try:
                 qweight = self.get_sharded(f"{prefix}.qweight", dim=0)
@@ -313,51 +377,70 @@ class Weights:
             weight = self.get_sharded(f"{prefix}.weight", dim=1)
         return weight
 
-    def _get_gptq_params(self) -> Tuple[int, int]:
+    def _get_gptq_params(self) -> Tuple[int, int, int, str]:
         try:
             bits = self.get_tensor("gptq_bits").item()
             groupsize = self.get_tensor("gptq_groupsize").item()
+            desc_act = False
+            quant_method = "gptq"
         except (SafetensorError, RuntimeError) as e:
             try:
                 bits = self.gptq_bits
                 groupsize = self.gptq_groupsize
+                desc_act = getattr(self, "gptq_desc_act", False)
+                quant_method = getattr(self, "quant_method", "gptq")
             except Exception:
                 raise e
 
-        return bits, groupsize
+        return bits, groupsize, desc_act, quant_method
 
-    def _set_gptq_params(self, model_id):
+    def _set_gptq_params(self, model_id, revision):
         filename = "config.json"
         try:
             if os.path.exists(os.path.join(model_id, filename)):
                 filename = os.path.join(model_id, filename)
             else:
-                filename = hf_hub_download(model_id, filename=filename)
+                filename = hf_hub_download(
+                    model_id, filename=filename, revision=revision
+                )
             with open(filename, "r") as f:
                 data = json.load(f)
             self.gptq_bits = data["quantization_config"]["bits"]
             self.gptq_groupsize = data["quantization_config"]["group_size"]
+            # Order is important here, desc_act is missing on some real models
+            self.quant_method = data["quantization_config"]["quant_method"]
+            self.gptq_desc_act = data["quantization_config"]["desc_act"]
         except Exception:
             filename = "quantize_config.json"
             try:
                 if os.path.exists(os.path.join(model_id, filename)):
                     filename = os.path.join(model_id, filename)
                 else:
-                    filename = hf_hub_download(model_id, filename=filename)
+                    filename = hf_hub_download(
+                        model_id, filename=filename, revision=revision
+                    )
                 with open(filename, "r") as f:
                     data = json.load(f)
                 self.gptq_bits = data["bits"]
                 self.gptq_groupsize = data["group_size"]
+                self.gptq_desc_act = data["desc_act"]
+                if "version" in data and data["version"] == "GEMM":
+                    self.quant_method = "awq"
             except Exception:
                 filename = "quant_config.json"
                 try:
                     if os.path.exists(os.path.join(model_id, filename)):
                         filename = os.path.join(model_id, filename)
                     else:
-                        filename = hf_hub_download(model_id, filename=filename)
+                        filename = hf_hub_download(
+                            model_id, filename=filename, revision=revision
+                        )
                     with open(filename, "r") as f:
                         data = json.load(f)
                     self.gptq_bits = data["w_bit"]
                     self.gptq_groupsize = data["q_group_size"]
+                    self.gptq_desc_act = data["desc_act"]
+                    if "version" in data and data["version"] == "GEMM":
+                        self.quant_method = "awq"
                 except Exception:
                     pass
diff --git a/tgi-entrypoint.sh b/tgi-entrypoint.sh
new file mode 100755
index 00000000..ea94dcd9
--- /dev/null
+++ b/tgi-entrypoint.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+ldconfig 2>/dev/null || echo 'unable to refresh ld cache, not a big deal in most cases'
+
+text-generation-launcher $@
diff --git a/update_doc.py b/update_doc.py
index 6206e211..6127418c 100644
--- a/update_doc.py
+++ b/update_doc.py
@@ -21,14 +21,14 @@ def main():
     block = []
     for line in lines:
         if line.startswith("  -") or line.startswith("      -"):
-            rendered_block = '\n'.join(block)
+            rendered_block = "\n".join(block)
             if header:
                 final_doc += f"## {header}\n```shell\n{rendered_block}\n```\n"
             else:
                 final_doc += f"```shell\n{rendered_block}\n```\n"
             block = []
             tokens = line.split("<")
-            if len(tokens)>1:
+            if len(tokens) > 1:
                 header = tokens[-1][:-1]
             else:
                 header = line.split("--")[-1]
@@ -36,7 +36,7 @@ def main():
 
         block.append(line)
 
-    rendered_block = '\n'.join(block)
+    rendered_block = "\n".join(block)
     final_doc += f"## {header}\n```shell\n{rendered_block}\n```\n"
     block = []