Update to Torch 2.7.0 (#3221)

* Update to Torch 2.7.0 * Try to fix typer/click issue * Pin click to fix incompatibility with typer * Fix some test outputs with slight deviations * Attempt again to sync with CI * Mamba too * Fixup mllama Also switch to `unsloth/Llama-3.2-11B-Vision-Instruct` for testing from the EU :).
2025-09-08 19:04:52 +00:00 · 2025-05-15 11:48:33 +02:00 · 2025-05-15 11:48:33 +02:00 · 7e531f413d
commit 7e531f413d
parent 535ce23827
27 changed files with 880 additions and 605 deletions
--- a/2
+++ b/2
@ -48,7 +48,7 @@ FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS pytorch-install
 WORKDIR /usr/src/
 # NOTE: When updating PyTorch version, beware to remove `pip install nvidia-nccl-cu12==2.22.3` below in the Dockerfile. Context: https://github.com/huggingface/text-generation-inference/pull/2099
-ARG PYTORCH_VERSION=2.6
+ARG PYTORCH_VERSION=2.7
 ARG PYTHON_VERSION=3.11
 # Keep in sync with `server/pyproject.toml
--- a/flake.lock
+++ b/flake.lock
@ -978,15 +978,16 @@
        "nixpkgs": "nixpkgs_6"
      },
      "locked": {
-        "lastModified": 1743931123,
+        "lastModified": 1746795305,
-        "narHash": "sha256-MDQrbJkweLYsMYh44Gx+c1gAZOCR1fmZF1lkavAHDto=",
+        "narHash": "sha256-4fpUT4j4w0NDKF22KvG7iGmwQTBPM5SrPEqt+N3fqF0=",
        "owner": "huggingface",
        "repo": "text-generation-inference-nix",
-        "rev": "1ad3feaadfdedca90278ee7676bca15019519189",
+        "rev": "359cd25f31f0f2ad2cadfbf4e180780a7a06e3c5",
        "type": "github"
      },
      "original": {
        "owner": "huggingface",
        "ref": "torch-2.7",
        "repo": "text-generation-inference-nix",
        "type": "github"
      }
--- a/flake.nix
+++ b/flake.nix
@ -5,7 +5,7 @@
      inputs.nixpkgs.follows = "tgi-nix/nixpkgs";
    };
    nix-filter.url = "github:numtide/nix-filter";
-    tgi-nix.url = "github:huggingface/text-generation-inference-nix";
+    tgi-nix.url = "github:huggingface/text-generation-inference-nix/torch-2.7";
    nixpkgs.follows = "tgi-nix/nixpkgs";
    flake-utils.url = "github:numtide/flake-utils";
    rust-overlay = {
--- a/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_jpg.json
+++ b/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_jpg.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "Okay, let's analyze the image.\n\nThe image is a solid, bright white color. There is nothing else visible within it. \n\nIt's essentially a blank white canvas or a completely white square. \n\nIs there anything specific you'd like me to do with this image, such as describe it further or imagine what it might represent?",
+        "content": "Okay, let's analyze the image.\n\nThe image is a solid, bright white color. There is nothing else visible within it. \n\nIt's essentially a blank white square or rectangle.",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,14 +13,14 @@
      "usage": null
    }
  ],
-  "created": 1741965894,
+  "created": 1747062956,
  "id": "",
  "model": "google/gemma-3-4b-it",
  "object": "chat.completion",
-  "system_fingerprint": "3.2.1-dev0-native",
+  "system_fingerprint": "3.3.0-dev0-native",
  "usage": {
-    "completion_tokens": 74,
+    "completion_tokens": 42,
    "prompt_tokens": 277,
-    "total_tokens": 351
+    "total_tokens": 319
  }
 }
--- a/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_png.json
+++ b/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_png.json
@ -1,11 +1,11 @@
 {
  "choices": [
    {
-      "finish_reason": "length",
+      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "Okay, let's analyze the image. \n\nThe image is entirely white, with a very subtle, faint outline of a stylized, cartoonish figure. It appears to be a simplified depiction of a person, likely a child, with a wide-eyed expression and a small, rounded body. \n\nIt's almost like a minimalist, iconic representation. \n\nDo you want me to try and describe it in more detail, or perhaps suggest what this image might represent (e.g",
+        "content": "Okay, let's analyze the image. \n\nThe image is a very plain, solid white square. That's it! \n\nIt's essentially a blank canvas. \n\nDo you want me to describe it in more detail, or are you interested in something else regarding this image?",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,14 +13,14 @@
      "usage": null
    }
  ],
-  "created": 1744396706,
+  "created": 1747062955,
  "id": "",
  "model": "google/gemma-3-4b-it",
  "object": "chat.completion",
  "system_fingerprint": "3.3.0-dev0-native",
  "usage": {
-    "completion_tokens": 100,
+    "completion_tokens": 62,
    "prompt_tokens": 277,
-    "total_tokens": 377
+    "total_tokens": 339
  }
 }
--- a/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_base64_rgba.json
+++ b/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_base64_rgba.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "Okay, let's analyze the image. \n\nThe transparent image reveals a stylized depiction of **a human head**. It's a minimalist, geometric representation, showing the basic shapes of the skull, eye sockets, and head outline. \n\nIf you'd like, you can give me more details about the image or ask me to focus on a specific aspect of it.",
+        "content": "Okay, let's analyze the image. \n\nThe transparent image reveals a stylized depiction of **a human head**. It's a minimalist, geometric representation, showing the basic shapes of the skull, eye sockets, and head outline. \n\nDo you want me to describe any specific element of the image in more detail?",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,14 +13,14 @@
      "usage": null
    }
  ],
-  "created": 1744396703,
+  "created": 1747062952,
  "id": "",
  "model": "google/gemma-3-4b-it",
  "object": "chat.completion",
  "system_fingerprint": "3.3.0-dev0-native",
  "usage": {
-    "completion_tokens": 78,
+    "completion_tokens": 67,
    "prompt_tokens": 277,
-    "total_tokens": 355
+    "total_tokens": 344
  }
 }
--- a/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_cow.json
+++ b/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_cow.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a day at the beach!",
+        "content": "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a beach day!",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,14 +13,14 @@
      "usage": null
    }
  ],
-  "created": 1744396699,
+  "created": 1747216083,
  "id": "",
  "model": "google/gemma-3-4b-it",
  "object": "chat.completion",
  "system_fingerprint": "3.3.0-dev0-native",
  "usage": {
-    "completion_tokens": 74,
+    "completion_tokens": 72,
    "prompt_tokens": 275,
-    "total_tokens": 349
+    "total_tokens": 347
  }
 }
--- a/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json
+++ b/integration-tests/models/snapshots/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their reddish-brown color and distinctive white markings. \n\nIf you'd like, you can send me another image and I’ll do my best to identify it!",
+        "content": "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their beautiful reddish-brown coats and distinctive white markings. \n\nIf you'd like, you can send me another image, and I'll do my best to identify the animal in it!",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,14 +13,14 @@
      "usage": null
    }
  ],
-  "created": 1744396697,
+  "created": 1747216080,
  "id": "",
  "model": "google/gemma-3-4b-it",
  "object": "chat.completion",
  "system_fingerprint": "3.3.0-dev0-native",
  "usage": {
-    "completion_tokens": 75,
+    "completion_tokens": 80,
    "prompt_tokens": 279,
-    "total_tokens": 354
+    "total_tokens": 359
  }
 }
--- a/integration-tests/models/snapshots/test_flash_pali_gemma2/test_flash_pali_gemma_image.json
+++ b/integration-tests/models/snapshots/test_flash_pali_gemma2/test_flash_pali_gemma_image.json
@ -8,126 +8,126 @@
    "tokens": [
      {
        "id": 108,
-        "logprob": -0.73046875,
+        "logprob": -0.48046875,
        "special": false,
        "text": "\n"
      },
      {
        "id": 30234,
-        "logprob": -2.328125,
+        "logprob": -2.21875,
        "special": false,
        "text": "Brown"
      },
      {
        "id": 108,
-        "logprob": -0.12060547,
+        "logprob": -0.119140625,
        "special": false,
        "text": "\n"
      },
      {
        "id": 3726,
-        "logprob": -1.7734375,
+        "logprob": -1.703125,
        "special": false,
        "text": "Car"
      },
      {
        "id": 108,
-        "logprob": -0.041503906,
+        "logprob": -0.0390625,
        "special": false,
        "text": "\n"
      },
      {
        "id": 2915,
-        "logprob": -1.796875,
+        "logprob": -1.8203125,
        "special": false,
        "text": "Color"
      },
      {
        "id": 108,
-        "logprob": -0.039794922,
+        "logprob": -0.035888672,
        "special": false,
        "text": "\n"
      },
      {
        "id": 19178,
-        "logprob": -1.96875,
+        "logprob": -2.015625,
        "special": false,
        "text": "Cool"
      },
      {
        "id": 108,
-        "logprob": -0.080566406,
+        "logprob": -0.08105469,
        "special": false,
        "text": "\n"
      },
      {
        "id": 40544,
-        "logprob": -2.1875,
+        "logprob": -2.09375,
        "special": false,
        "text": "Decor"
      },
      {
        "id": 108,
-        "logprob": -0.033935547,
+        "logprob": -0.038330078,
        "special": false,
        "text": "\n"
      },
      {
-        "id": 13936,
+        "id": 108,
        "logprob": -1.515625,
        "special": false,
        "text": "\n"
      },
      {
        "id": 108,
        "logprob": -1.8671875,
        "special": false,
        "text": "\n"
      },
      {
        "id": 108,
        "logprob": -1.6328125,
        "special": false,
-        "text": "Green"
+        "text": "\n"
      },
      {
        "id": 108,
-        "logprob": -0.16210938,
+        "logprob": -1.265625,
        "special": false,
        "text": "\n"
      },
      {
        "id": 955,
        "logprob": -2.015625,
        "special": false,
        "text": "..."
      },
      {
        "id": 108,
-        "logprob": -0.14746094,
+        "logprob": -1.0078125,
        "special": false,
        "text": "\n"
      },
      {
        "id": 955,
        "logprob": -0.73828125,
        "special": false,
        "text": "..."
      },
      {
        "id": 108,
-        "logprob": -0.051513672,
+        "logprob": -1.03125,
        "special": false,
        "text": "\n"
      },
      {
-        "id": 955,
+        "id": 235336,
-        "logprob": -0.34765625,
+        "logprob": -1.2109375,
        "special": false,
-        "text": "..."
+        "text": "?"
      },
      {
        "id": 108,
-        "logprob": -0.020141602,
+        "logprob": -0.29101562,
        "special": false,
        "text": "\n"
      },
      {
-        "id": 955,
+        "id": 235336,
-        "logprob": -0.11767578,
+        "logprob": -0.08935547,
        "special": false,
-        "text": "..."
+        "text": "?"
      }
    ],
    "top_tokens": null
  },
-  "generated_text": "\nBrown\nCar\nColor\nCool\nDecor\nGreen\n...\n...\n...\n..."
+  "generated_text": "\nBrown\nCar\nColor\nCool\nDecor\n\n\n\n\n\n\n?\n?"
 }
--- a/integration-tests/models/snapshots/test_idefics/test_idefics_two_images.json
+++ b/integration-tests/models/snapshots/test_idefics/test_idefics_two_images.json
@ -2,84 +2,90 @@
  "details": {
    "best_of_sequences": null,
    "finish_reason": "eos_token",
-    "generated_tokens": 12,
+    "generated_tokens": 13,
    "prefill": [],
    "seed": null,
    "tokens": [
      {
        "id": 450,
-        "logprob": -0.26342773,
+        "logprob": -0.2602539,
        "special": false,
        "text": " The"
      },
      {
        "id": 21282,
-        "logprob": -0.01838684,
+        "logprob": -0.018463135,
        "special": false,
        "text": " cow"
      },
      {
        "id": 322,
-        "logprob": -0.18041992,
+        "logprob": -0.1829834,
        "special": false,
        "text": " and"
      },
      {
        "id": 521,
-        "logprob": -0.62841797,
+        "logprob": -0.62109375,
        "special": false,
        "text": " ch"
      },
      {
        "id": 21475,
-        "logprob": -0.0037956238,
+        "logprob": -0.0037403107,
        "special": false,
        "text": "icken"
      },
      {
        "id": 526,
-        "logprob": -0.018737793,
+        "logprob": -0.018920898,
        "special": false,
        "text": " are"
      },
      {
        "id": 13407,
        "logprob": -1.0732422,
        "special": false,
        "text": " standing"
      },
      {
        "id": 373,
-        "logprob": -1.0820312,
+        "logprob": -0.5292969,
        "special": false,
        "text": " on"
      },
      {
        "id": 263,
-        "logprob": -0.5083008,
+        "logprob": -0.47070312,
        "special": false,
        "text": " a"
      },
      {
        "id": 25695,
-        "logprob": -0.07128906,
+        "logprob": -0.25708008,
        "special": false,
        "text": " beach"
      },
      {
        "id": 29889,
-        "logprob": -0.12573242,
+        "logprob": -0.17578125,
        "special": false,
        "text": "."
      },
      {
        "id": 32002,
-        "logprob": -0.0029792786,
+        "logprob": -0.0023422241,
        "special": true,
        "text": "<end_of_utterance>"
      },
      {
        "id": 2,
-        "logprob": -0.00024962425,
+        "logprob": -0.00030851364,
        "special": true,
        "text": "</s>"
      }
    ],
    "top_tokens": null
  },
-  "generated_text": " The cow and chicken are on a beach."
+  "generated_text": " The cow and chicken are standing on a beach."
 }
--- a/integration-tests/models/snapshots/test_mamba/test_mamba_all_params.json
+++ b/integration-tests/models/snapshots/test_mamba/test_mamba_all_params.json
@ -64,7 +64,7 @@
      },
      {
        "id": 329,
-        "logprob": -2.171875,
+        "logprob": -2.296875,
        "special": false,
        "text": " A"
      },
@ -81,19 +81,19 @@
        "text": " of"
      },
      {
-        "id": 1027,
+        "id": 253,
-        "logprob": -1.5546875,
+        "logprob": -0.86328125,
        "special": false,
-        "text": " different"
+        "text": " the"
      },
      {
        "id": 3295,
-        "logprob": -0.97265625,
+        "logprob": -0.55078125,
        "special": false,
        "text": " color"
      }
    ],
    "top_tokens": null
  },
-  "generated_text": "blue, red, yellow, \nand blue colors. A number of different color"
+  "generated_text": "blue, red, yellow, \nand blue colors. A number of the color"
 }
--- a/integration-tests/models/snapshots/test_mllama/test_mllama_load.json
+++ b/integration-tests/models/snapshots/test_mllama/test_mllama_load.json
@ -14,9 +14,9 @@
        "usage": null
      }
    ],
-    "created": 1746054921,
+    "created": 1747230173,
    "id": "",
-    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    "model": "unsloth/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
    "system_fingerprint": "3.3.0-dev0-native",
    "usage": {
@ -40,9 +40,9 @@
        "usage": null
      }
    ],
-    "created": 1746054921,
+    "created": 1747230173,
    "id": "",
-    "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+    "model": "unsloth/Llama-3.2-11B-Vision-Instruct",
    "object": "chat.completion",
    "system_fingerprint": "3.3.0-dev0-native",
    "usage": {
--- a/integration-tests/models/snapshots/test_mllama/test_mllama_simpl.json
+++ b/integration-tests/models/snapshots/test_mllama/test_mllama_simpl.json
@ -5,7 +5,7 @@
      "index": 0,
      "logprobs": null,
      "message": {
-        "content": "A chicken stands on a pile of money, looking",
+        "content": "A chicken sits on a pile of money, looking",
        "name": null,
        "role": "assistant",
        "tool_calls": null
@ -13,9 +13,9 @@
      "usage": null
    }
  ],
-  "created": 1746054919,
+  "created": 1747230171,
  "id": "",
-  "model": "meta-llama/Llama-3.2-11B-Vision-Instruct",
+  "model": "unsloth/Llama-3.2-11B-Vision-Instruct",
  "object": "chat.completion",
  "system_fingerprint": "3.3.0-dev0-native",
  "usage": {
--- a/integration-tests/models/snapshots/test_smolvlm/test_flash_smolvlm_next_simple_url.json
+++ b/integration-tests/models/snapshots/test_smolvlm/test_flash_smolvlm_next_simple_url.json
@ -8,49 +8,49 @@
    "tokens": [
      {
        "id": 330,
-        "logprob": -0.118652344,
+        "logprob": -0.107421875,
        "special": false,
        "text": " A"
      },
      {
        "id": 11426,
-        "logprob": -0.28320312,
+        "logprob": -0.30078125,
        "special": false,
        "text": " bee"
      },
      {
        "id": 335,
-        "logprob": -0.95703125,
+        "logprob": -0.9609375,
        "special": false,
        "text": " on"
      },
      {
        "id": 253,
-        "logprob": -0.06982422,
+        "logprob": -0.0703125,
        "special": false,
        "text": " a"
      },
      {
        "id": 11986,
-        "logprob": -0.49414062,
+        "logprob": -0.5,
        "special": false,
        "text": " pink"
      },
      {
        "id": 8525,
-        "logprob": -0.07763672,
+        "logprob": -0.09716797,
        "special": false,
        "text": " flower"
      },
      {
        "id": 30,
-        "logprob": -1.0703125,
+        "logprob": -1.078125,
        "special": false,
        "text": "."
      },
      {
        "id": 49154,
-        "logprob": -0.092285156,
+        "logprob": -0.110839844,
        "special": true,
        "text": "<end_of_utterance>"
      }
--- a/integration-tests/models/test_flash_gemma3.py
+++ b/integration-tests/models/test_flash_gemma3.py
@ -53,9 +53,9 @@ async def test_flash_gemma3_image_cow_dog(flash_gemma3, response_snapshot):
    assert (
        response.choices[0].message.content
-        == "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their reddish-brown color and distinctive white markings. \n\nIf you'd like, you can send me another image and I’ll do my best to identify it!"
+        == "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their beautiful reddish-brown coats and distinctive white markings. \n\nIf you'd like, you can send me another image, and I'll do my best to identify the animal in it!"
    )
-    assert response.usage["completion_tokens"] == 75
+    assert response.usage["completion_tokens"] == 80
    assert response == response_snapshot
@ -76,9 +76,9 @@ async def test_flash_gemma3_image_cow(flash_gemma3, response_snapshot):
    )
    assert (
        response.choices[0].message.content
-        == "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a day at the beach!"
+        == "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a beach day!"
    )
-    assert response.usage["completion_tokens"] == 74
+    assert response.usage["completion_tokens"] == 72
    assert response == response_snapshot
--- a/integration-tests/models/test_flash_pali_gemma2.py
+++ b/integration-tests/models/test_flash_pali_gemma2.py
@ -22,8 +22,7 @@ async def test_flash_pali_gemma_image(flash_pali_gemma, response_snapshot):
        max_new_tokens=20,
    )
    assert (
-        response.generated_text
+        response.generated_text == "\nBrown\nCar\nColor\nCool\nDecor\n\n\n\n\n\n\n?\n?"
        == "\nBrown\nCar\nColor\nCool\nDecor\nGreen\n...\n...\n...\n..."
    )
    assert response == response_snapshot
--- a/integration-tests/models/test_idefics.py
+++ b/integration-tests/models/test_idefics.py
@ -39,7 +39,7 @@ async def test_idefics_two_images(idefics, response_snapshot, chicken, cow_beach
        max_new_tokens=20,
    )
    assert (
-        response.generated_text == " The cow and chicken are on a beach."
+        response.generated_text == " The cow and chicken are standing on a beach."
    ), f"{repr(response.generated_text)}"
    assert response == response_snapshot
--- a/integration-tests/models/test_mamba.py
+++ b/integration-tests/models/test_mamba.py
@ -47,7 +47,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot):
    assert response.details.generated_tokens == 10
    assert (
        response.generated_text
-        == "blue, red, yellow, \nand blue colors. A number of different color"
+        == "blue, red, yellow, \nand blue colors. A number of the color"
    )
    assert response == response_snapshot
--- a/integration-tests/models/test_mllama.py
+++ b/integration-tests/models/test_mllama.py
@ -5,7 +5,7 @@ import asyncio
@pytest.fixture(scope="module")
 def mllama_handle(launcher):
    with launcher(
-        "meta-llama/Llama-3.2-11B-Vision-Instruct",
+        "unsloth/Llama-3.2-11B-Vision-Instruct",
        num_shard=2,
    ) as handle:
        yield handle
@ -48,7 +48,7 @@ async def test_mllama_simpl(mllama, response_snapshot):
    }
    assert (
        response.choices[0].message.content
-        == "A chicken stands on a pile of money, looking"
+        == "A chicken sits on a pile of money, looking"
    )
    assert response == response_snapshot
--- a/server/Makefile
+++ b/server/Makefile
@ -38,7 +38,7 @@ install: install-cuda
 	echo "Installed server"
 install-cuda: install-server install-flash-attention-v2-cuda install-flash-attention
-	uv sync --inexact --extra attention --extra bnb --extra marlin --extra moe --active
+	uv sync --inexact --extra attention --extra bnb --active
 	uv pip install nvidia-nccl-cu12==2.22.3
 	kernels download .
@ -46,6 +46,6 @@ install-rocm: install-server install-flash-attention-v2-rocm  install-vllm-rocm
 export-requirements:
 	uv pip compile pyproject.toml --extra gen -o requirements_gen.txt --python-version 3.11
-	uv pip compile pyproject.toml --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines -o requirements_cuda.txt --python-version 3.11
+	uv pip compile pyproject.toml --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_cuda.txt --python-version 3.11
 	uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_intel.txt --python-version 3.11
 	uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_rocm.txt --python-version 3.11
--- a/server/kernels.lock
+++ b/server/kernels.lock
@ -1,270 +1,410 @@
 [
  {
    "repo_id": "kernels-community/paged-attention",
-    "sha": "331b7e63a6b592799c8bc992f681bb1ee2c865a2",
+    "sha": "1e0a9708f0fe47009a3d292226c5492474353258",
    "variants": {
      "torch25-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-8e0aa39abab82f1d21b661d35e0470a24c3ebbdda38532ded805c18037a1ad1e",
+        "hash": "sha256-99710450ce815fdd0eeab3862ed0940c37a236c4f6cd49399e0112d66c9e40cb",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu121-x86_64-linux": {
-        "hash": "sha256-b0c3aef6c4c9aac627975cb1a2bfc46a70390763c8165575b89d1651d007c38a",
+        "hash": "sha256-bf136ffb4732e141e05738606a014fde18d3aa6d4345d6223858327c00eef2d1",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-960fbc8998439d779adb47fb2a37cce68c7dc075d8a49893bd487be9ca2d1389",
+        "hash": "sha256-5ff343fc4feadf36ea38032d2a014a1cd6008fe22dea26191cd397745dbaf8ae",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-9d6d60c411c55aa2f9d7c681c2be96f4262d56c96f592f3d4fb35ce4f4f1e18e",
+        "hash": "sha256-5db4fd37dcc6ec49ea71eba49415758b98fc21699155632902c76a545b36c47a",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu121-x86_64-linux": {
-        "hash": "sha256-98c0a305b2cc9b7be757fab923d9aa406c686dcd0460e462926f87d051ef3d19",
+        "hash": "sha256-995ff1a0cfe569639bc1644b5d6d823ea47ad0da33fe1cf398370ee70a203eb3",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-71e586416213c96ffbdeae0d077ba97bfde5b00005f2746d4cba2320cb53bf87",
+        "hash": "sha256-1a00b021ea1273acb003ebd459699287ebf3d03f949befa31ae91899fa90b9e8",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-2f559312c54d558b33a4082ffc3fcf923f51da40ced19bfc8920e998ba2b71bf",
+        "hash": "sha256-91e57835ae0f6e2df38c65c9e2eb47d33b609c7c117f6a86898740ad17653dba",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-6033b41a0f8a9509887c6171f0b42d9aa738490903b3fd5ea2c52703c5fb8fc3",
+        "hash": "sha256-5435890298a7eca613c805c8aee08b5a4405a1a7ad38ad3bc43bba14b26683ae",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-b3dffef571f4f813b727ce3b2fcb7b43ee9d2e793b594e6ccf3a694bac87280a",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-x86_64-linux": {
-        "hash": "sha256-3139f66a53f2bf0c314b4d309893095746bdc9c3914c904fc31adfdf553ed219",
+        "hash": "sha256-7ce5d58943f52959cc9643477e4dc211c7592628968cc53714e307092c95a769",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-2173d77e384d8e2881fc38603992c09e8be7bcd9da4cafdd4f2a5ce0ce22caf4",
+        "hash": "sha256-c74c251ba84cf6ea4c0402ed6dec7dca92f46b101f299a0abb1bcab5c83d2165",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-7b1aaef81e01ecce83e03c50872910680ff2953f7c6ffd3ff15e8d9497ca9239",
+        "hash": "sha256-44661e14516679bfa1788a4919c01014e9cd2402ad6231947bf7a6ca55002ecd",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-aarch64-linux": {
        "hash": "sha256-e28ca88f80f95eede03eae610c08f83caabe579e15d110d9e070e46b6435770f",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-x86_64-linux": {
-        "hash": "sha256-818b160a88b12b8e871099e40f76aa436ee828e2e060ecc35502dbe34a6ebd3b",
+        "hash": "sha256-05eb63f56b6b665d0e25919a8f429c8c3b2e0e3fc55725885d0e68e9011ca283",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu118-x86_64-linux": {
        "hash": "sha256-ef0c14844fd8df0ce765b85497c90ce1091b4a780642d86bf206799ba9d3c94a",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-ab151aea475c6880ed15e8f9232bf8720f7f0f2b96acdac65a5bcb7e5ab727b1",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-x86_64-linux": {
        "hash": "sha256-08345dd704dcea727b9c2c109664f1602f97908fed84522edb817d95eb859f74",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-aarch64-linux": {
        "hash": "sha256-c2419e4057e26bd90360dacd30f1b51eea1fde2efed9bd4c7db034ffc2962a5a",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-x86_64-linux": {
        "hash": "sha256-a85fa6b43d438380c9d064769d8dd509ebf5206327a326082c0c249c0704ca46",
        "hash_type": "git_lfs_concat"
      }
    }
  },
  {
    "repo_id": "kernels-community/moe",
-    "sha": "605a216f507b9a97b543140dee8937a4622069a8",
+    "sha": "e3efab933893cde20c5417ba185fa3b7cc811b24",
    "variants": {
      "torch25-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-855d92f02be3bfba0758161fa1266159d76c172e7c5d43d30816d22cfba76074",
+        "hash": "sha256-719817bc2320f52d510e4a62bceef41a0ba8c58ea0e67d844db4225add3c5783",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu121-x86_64-linux": {
-        "hash": "sha256-e6e780230477bbbc26fc40cc7fcff50298155998af4fc77a026c9f815ec984b1",
+        "hash": "sha256-1b5973b5d9376e377ff223aed71936cc25f19367c8db7fcd9aa70960c15de290",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-52c1fb337033c4d1d7a279c5cb28aebbc7389976f21dc5803aeb16b2f7aeb94c",
+        "hash": "sha256-69e1e5603c01227c3e2cbd67c09dd39fa7c0d4ecf3f736a2eb07227f6bb8935b",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-1fb654e8d02dda2a2382d1fb3a3ca9738d292eea674b30b80030cdcdfb6a0035",
+        "hash": "sha256-91626ab4046b04e1a0967cc5c8a60a248e611b413e1cace3e4bdb0fc3a68a0e4",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu121-x86_64-linux": {
-        "hash": "sha256-0cf235f1de85d4ce7490c79aa64220f608f886f313b676d91c331a6a2fd67bbb",
+        "hash": "sha256-84dd628239aa3043bc048c51f513faf55042ccc3d372002bbc231b0aa6d6689f",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-3def11fee9bf1ea9b1579206fd5f5ecbcaad47ac478e2c3aa7b2c9c7fd5db934",
+        "hash": "sha256-ffb9743f69aae59fba1cfed1fc9e2e0f90a9000121c2db5880f0e055a714931a",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-3a49ee03f675190a79c7c74a45cc403d491eceb63a943f47d52064a11ca6ef6f",
+        "hash": "sha256-30560d5c091a9be1914fc8bf42d86767cfb07f1b7335f1ee88797e42f31e7856",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-dbf20cb11db7d53e11147ab13641eefaa235f9ac2fde1beaf8f56f850c11bd54",
+        "hash": "sha256-6e2afd532fdc9cee8f532097a80e4c2139f47df8005c43c5cdac42204d6217e1",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-93d46cc7701358cd5a4e5ae3fafde8120fdb765149b9a9224f52a802b7d48cf1",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-x86_64-linux": {
-        "hash": "sha256-8a07232ab316e8eab74747662cb7b86aac03f44ff158f275768fd59390df2525",
+        "hash": "sha256-e57c961ea9c1a411c5b348986e359b1e6f1102fa09cfaa82d20f96d09528098a",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-cdd46301af997eeace5e016d8590969981b3a3f8647828d04baa5fa10c696746",
+        "hash": "sha256-946b982082c008220a667f44e4308c17933e0d4785cad72ececa35273275f09c",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-c865188e9d2c17f3358f3d343fb40340232457572744bf85efd6b20af545d5f3",
+        "hash": "sha256-227be46b6cc468fadc237bb616d14e4747ad122bc0a2cd5bbef1a2b89a63d5bf",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-aarch64-linux": {
        "hash": "sha256-d0dc0c8f34608f7c735e804c606dff029708349e68d5b9d9df7541b2498c1e8e",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-x86_64-linux": {
-        "hash": "sha256-2a8b09f3272ea80491e78a39ff886680471d99f7ba571581809adfe918013898",
+        "hash": "sha256-91b3df206bd4418e42d08608fdf652d65612342efc8f67958a66d68038179567",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu118-x86_64-linux": {
        "hash": "sha256-4b0f4536cd8f24ef00f06e00dfa0123c03dada7de3394a6274ec5cfa3bbf31f6",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-4c8468437ac977116f46be9a6871b0887f762ba44d3aea3c3ce2eb41637fb626",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-x86_64-linux": {
        "hash": "sha256-9a0d84b8636a897e4a5abd243f48a71d7d470c2f8e28df6a6874a9d981105c0f",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-aarch64-linux": {
        "hash": "sha256-11e6c4ce82a25d17664b4100af419f974fc312ac283195129c91519dac4d5812",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-x86_64-linux": {
        "hash": "sha256-c49a6eda12752adf78690a5e985a55d3b85d6724be5d18db51cd03d5fc75cc9b",
        "hash_type": "git_lfs_concat"
      }
    }
  },
  {
    "repo_id": "kernels-community/quantization",
-    "sha": "95272c71ca71b1ddbacb0105dab54e5d5240bd5c",
+    "sha": "6470f9b005797e00279eb9103463dfe0f8b7da00",
    "variants": {
      "torch25-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-2d0a274cf0117bf7880d6040adafa1b70fe8bff3a00ef2834ed5435a6b525a49",
+        "hash": "sha256-f52c9b1a7cd98fb389c6d2a0b22a293cb36eb96af3a624f5aec761735861c96d",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu121-x86_64-linux": {
-        "hash": "sha256-116458beac63ea5eeb1e7fba7edc68d160cd8ac28f55b926d79035551aac7d5f",
+        "hash": "sha256-e5f0da343363a562ce52f147a9534cd54a3efa90e70671f606cc2516f02a3876",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-cace644c6fb04470384796c18987135cb051dfb90a14e902c51a3786fc07c599",
+        "hash": "sha256-caad9300c155faf79c26426f10951ba75f931a05e741a5b39a24b064daabc040",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-104c6961cd3e1a74efdf14ea2172acc6647846852fccafe3698a27a6cf37941d",
+        "hash": "sha256-4fc87893de14a29ba4b55f5026ea05ec5901c0b52abd5ebae681ea0b791e858c",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu121-x86_64-linux": {
-        "hash": "sha256-cdc95b41aa91a803f11f8cd53001895c2b69550b5af2fb278d6f124381229d0b",
+        "hash": "sha256-72c975ea63fc524a38fcee5b2dbdb566eff0a0ea546ee5756441d04908e4e896",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-d5388469cb6074f196f20b1e1e4805bb3c967a8147b31ca2c0461aa87b50604e",
+        "hash": "sha256-28c5510e3b07eae2b3846b880f6111da65df024e1f24f81077d187a97c015364",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-70c4bb3792c4c3207d4963173d8d0ef3b2bda677151aef140662dd87bfa1b69f",
+        "hash": "sha256-8444cf77686578a6b0f7e2fd29bf2783ba120ebf7df41573f61d2521fd0acc10",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-bcacbb2232f49345f27e07fa821b48a7e3df643c01af37281fcafc74c471f682",
+        "hash": "sha256-6ea8e00625b5fe799fbe407e7de0fc08228cac26f9bbed2d70a6500026fe3bab",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-0b8b8afbdaf9aa533895cb9e884e3ad3e9a34d483f05a1bbde1b8902f9dbeb0f",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-x86_64-linux": {
-        "hash": "sha256-344d20964f7eb133e5ec6fda976fa5ee62807b739a4361f236aca5ae53beb9ac",
+        "hash": "sha256-e115e855d7ca4b97787f04c88e128432256c6b43d4823fb8889ab9985dc4cf36",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-dfaec226550254fbce1a5c7e2f547e85700958a1a4087e1c873d22e6f71a5ceb",
+        "hash": "sha256-509f08c48a05584cc85c058607277fcbe3193e6cc61846dd2416d39e27c1d68e",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-0abe6460d0a2202b0086e3663092595e5b93b9a9cbb85c10034180cc9bfebc6e",
+        "hash": "sha256-a10236bffd435296c736ae2762ab0836da2421297e46b377368a17b39d70c27b",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-aarch64-linux": {
        "hash": "sha256-ca2cb56f3eea4c399a61e21ba9b577d718b250aa60a13f42f01019ddd5cd8b0c",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-x86_64-linux": {
-        "hash": "sha256-68e156f94c3c0c9523773b62eaeced93766e0d9ee67d8191fb9570fb5af30d5b",
+        "hash": "sha256-8fcd62d8243a30b63a03751cc0c15d24f6e00e43eae79f7281627f24e078bf9a",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu118-x86_64-linux": {
        "hash": "sha256-60f5807ee3da937c57c1b6080c30632305aa4875ed5a52bf4e81968770b61b13",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-64298b1713dc1d950915dc6569a06e2f541de3ed80aa5b32084246c1fdc7a958",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-x86_64-linux": {
        "hash": "sha256-d9e219890dc28e8582ef21d6f81f2ebc361de218a86b742be63bc4714f102e5e",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-aarch64-linux": {
        "hash": "sha256-d72549f51aefcf020bc74262bbbccb78094638c5ab9adc8667873d247c1cce86",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-x86_64-linux": {
        "hash": "sha256-d31ac5f87d7c7f62c63c72946479193aed467c9417c0acead5137e0e1fa968f8",
        "hash_type": "git_lfs_concat"
      }
    }
  },
  {
    "repo_id": "kernels-community/quantization-eetq",
-    "sha": "a80ce846d6270ddddeee109523ed947f594f246b",
+    "sha": "1aa83b1261b0c4cad890184a4d689e6330a110b5",
    "variants": {
      "torch25-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-e06beb00799b1e656583eb0496f09fc0bf1b26f75e9864a2fe19ebd5b62c3671",
+        "hash": "sha256-de257728ec38f48220d6c90b2fd960fed1f4c963e7cd6c204abfcf8607aedc20",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu121-x86_64-linux": {
-        "hash": "sha256-c128d3ef6558cfedf045c4a713891792708851b7f6f027de835d9083cb3b297d",
+        "hash": "sha256-9027918cf6e52591f97b2c621355e12d9adf0dfe833a763219813bfecd1ad1a3",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-c7e2e14fc114788634b34a4f670f7bf4d27321e5ed40ff446f5a25eef70222c7",
+        "hash": "sha256-15cd0a56311897b27ee50617491cf69e698053a9f9af7bd37937cbca8da9db13",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-58dad53cfbf1315af464f9d8ba7be9012089c839d4f06a8d2cf8ce0deaf5949a",
+        "hash": "sha256-ca35ccbb193c795587f4a0ea072fda6f0a0ac7f745f7a68e35c35012098f0a57",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu121-x86_64-linux": {
-        "hash": "sha256-6519af49c0f689744a7b49497ad2bea1524b69e4095446087d7ab622b898aa30",
+        "hash": "sha256-e7b12bd79163ee0f520b4a399f69c29e4a692667edf27f7d100f053434d8840c",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-94e0731b58a9ba0e5e2f37b100c8d987c80b5d349008ef625917d020b6c52d25",
+        "hash": "sha256-f08e850e856faa42c992188affa898a9b5a7be9d64980c4193871b0ad999da78",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-e5b04475538f49d7b4ffded080e4c9c86a658abc12667e3838ebcc410ab1eef4",
+        "hash": "sha256-9596f1c7cdbc7adf75898d18f370dc33ce0dfab2559301244411f5f4c4e581d4",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-783c02db737a6ec9958b3090f164b87888d3b26e30a4fb6e1cd0c1a635753fab",
+        "hash": "sha256-90002710f9e59d12bff260ce288c2b2b954f988f94ef920c8384c97946b7782b",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-d230dd53423cf29387350d2e28cc691785135613408edb73c79f5d965dbb30e5",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-x86_64-linux": {
-        "hash": "sha256-a3d81f82f9cfe9d8a6d46758758b3a1b3055d902f41917b4ef2976373db843d6",
+        "hash": "sha256-fb95eb2faee971ebc0ede12678816c7796b64c723e4fd787aea97397f1c7f5cd",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-f1de67e17944a9816f778c72ae73bbbc90d795cb4885c2f9ee5e0b9a3c57583b",
+        "hash": "sha256-027930f857347a4f1524fa37244c41c53ffb8c1ebd4eeb72fa32eea4a28b8787",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-789b50d767a5121a7e5a52eaf0c8e897bf1787f049ca08faffb220e5053a5f10",
+        "hash": "sha256-59ee042d58d57100c415f491a3db905671e094707f786f5f7e3260d5b827ad6a",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-aarch64-linux": {
        "hash": "sha256-1f9d739bd8198c330b1f2893e0301740c54fa95272233fadb7a95c9b53a70383",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-x86_64-linux": {
-        "hash": "sha256-7c7fe57fea7b9be253085d506f01b2487b2306f22bdffe1de44397fc9f8a3613",
+        "hash": "sha256-f56c5ea702982b9f75dedeb3a8998550b1b38bcacd77590926234e221fcc571f",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu118-x86_64-linux": {
        "hash": "sha256-9c6f2b7fea5327abee2920da86dd57878d5f35aacacc886875050649073d1565",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-fba9bd51e4aa5515ed81193743512dec2129f38555a16a54710e650a717259a8",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-x86_64-linux": {
        "hash": "sha256-990b615c4b5d2f96874e7f88767681544d84771f3a11443cf0c994759f5e5f75",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-aarch64-linux": {
        "hash": "sha256-6ad809543e1099f91b022f1393fe9a4527957b854cdfe6c8f4a0632c5497cb9d",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-x86_64-linux": {
        "hash": "sha256-90aaa73d93db015c693a4089f2574c2ec2d4943bcee5c9b0ede2834a2c72c370",
        "hash_type": "git_lfs_concat"
      }
    }
  },
  {
    "repo_id": "kernels-community/rotary",
-    "sha": "4db658e027ec752840bb3f557ee076413b8db03f",
+    "sha": "804a326b61f181778b5eb4ebe27aecdb8fbcd845",
    "variants": {
      "torch25-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-907df2035267a65793985bb7f69fb2a975955fb08c2bbc78c58def43d02801da",
+        "hash": "sha256-198c67cc7330535da671086c3b6a0dd6189015381f25b409704b51224b25ae3c",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu121-x86_64-linux": {
-        "hash": "sha256-b614735ae61ee2c1825a3c823fa0cdd3aa07d0bb3f4106001b9e1a557c0ca9b9",
+        "hash": "sha256-c2e8233d79dd36fc778502c0d44e7399907c2ef064981c7d122fb0652c71eca5",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-f2e98ec72faaebc1cae25f83ccdbb151868b6902fb5a0623e09d700a514c2a7e",
+        "hash": "sha256-452040cd5c335a3985da635a76db60a6fc0d9f8b1050fdf29f837d42ee2742ea",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-421214c5a576fac2e0b7998395dccd7f66010f65a6fc647ce06b106ea91105d2",
+        "hash": "sha256-b627ad5946713c8893f2847eb28f87203f3caaa84f2f35bb9f7b54ea9c3c8a5d",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu121-x86_64-linux": {
-        "hash": "sha256-9d1c464cf7f391975afa48f2254a639f41582155ad1b50c25bb122418ce8db58",
+        "hash": "sha256-30311ae1858e29754a4c69e081466e78202ffe8522d08afa46f06350f54cfcd1",
        "hash_type": "git_lfs_concat"
      },
      "torch25-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-82f8012d78304efaa7318f106907630294d10c8b5c9f56923c71df0b03e09f14",
+        "hash": "sha256-f988c59f5ac640c657f51c7a463f7bcc5ff789109275d8b14f524ad300f9ca55",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu118-x86_64-linux": {
-        "hash": "sha256-a3247919dcc392efc7e54725dfbce9ee8a796fe4ee53d113048b313de074d3da",
+        "hash": "sha256-58998893b9992e3ede276388e09c1c31da0b6175d68cf37bcb75bd6f69dba240",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu124-x86_64-linux": {
-        "hash": "sha256-a21c9734d15946f4cc967d0555d45d7effc6624990c6889fc49162af744fbbe9",
+        "hash": "sha256-2fdc356b7a5ce2f090dead00253180a750ec9ff72c0afc5f3f07c96e2e603916",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-d82cd995be25b4b88b0a4086269dcdeb400d0720141fbbfa47bf88cd639ae7e1",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx11-cu126-x86_64-linux": {
-        "hash": "sha256-01cdda160425b29db0d9bb084874ade4ac081735f9717f272aaefe5bcb379ae1",
+        "hash": "sha256-a6cd702f278dcbd94f8412d51f79a2664844217b7344bdd24353760c72a789d5",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu118-x86_64-linux": {
-        "hash": "sha256-17be5b770418ad47101c49d8945b5aa32af9eb5a840bdffb0514d0e264edd860",
+        "hash": "sha256-c759c2e38a17ea61446afb881cfa2a152d82350e6d38efecbec8ebe1e27cf81f",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu124-x86_64-linux": {
-        "hash": "sha256-3cd4b9f63cc903e01325b7e5b204e40fc6600c0685f2e19e6f1fa604a599d82d",
+        "hash": "sha256-d81512fa75acbe8a124b9890bb041fdd1e447794ee210bbb5d01343bd5033eec",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-aarch64-linux": {
        "hash": "sha256-a81df695a1b980f899df3c05920a04ff15a89dd28c8cef4067e4e6579669292b",
        "hash_type": "git_lfs_concat"
      },
      "torch26-cxx98-cu126-x86_64-linux": {
-        "hash": "sha256-c569f4a4f9b64792507c58d7cfa31dde1285b52125ef07cc98d9f23636af09ca",
+        "hash": "sha256-868a4b47368a251018bf8f67f3effd8685fed6b01e64725da7e653d38831b166",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu118-x86_64-linux": {
        "hash": "sha256-21ae5790dcf3936b66cd74641f815280ea648dffdc5259b7e1dba3fa5a8fc70d",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-aarch64-linux": {
        "hash": "sha256-93466448e31897ef7db0e84e7d6d36824661b15a9841e2476ff181e1eab155c2",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu126-x86_64-linux": {
        "hash": "sha256-e0ce52422c82c2ce966c44e61e0d65c789b36feaaeca818f88c2e746201cde9b",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-aarch64-linux": {
        "hash": "sha256-eb155e56df00ad7d6455f1549d072c39f14c2b7e355f729bf35cb3e62d087df9",
        "hash_type": "git_lfs_concat"
      },
      "torch27-cxx11-cu128-x86_64-linux": {
        "hash": "sha256-63b3f8fc56c940d824cdf06d3cc5b504d82c14e005c7d2ca5360e384a2b16af2",
        "hash_type": "git_lfs_concat"
      }
    }
--- a/server/pyproject.toml
+++ b/server/pyproject.toml
@ -9,6 +9,8 @@ authors = [
  {name = "Nicolas Patry", email = "nicolas@huggingface.co"},
 ]
 dependencies = [
    # Remove explicit click dependency once typer/click are compatible again.
    "click<8.2.0",
    "einops>=0.8.0",
    "grpc-interceptor>=0.15.4",
    "grpcio>=1.67.0",
@ -37,16 +39,16 @@ dependencies = [
 ]
 [[tool.uv.index]]
-name = "pytorch-cu124"
+name = "pytorch-cu128"
-url = "https://download.pytorch.org/whl/cu124"
+url = "https://download.pytorch.org/whl/cu128"
 explicit = true
 [tool.uv.sources]
 torch = [
-  { index = "pytorch-cu124", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 torchvision = [
-  { index = "pytorch-cu124", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+  { index = "pytorch-cu128", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 [build-system]
@ -92,8 +94,8 @@ gen = [
    "mypy-protobuf>=3.6.0",
 ]
 torch = [
-    "torch==2.6.0",
+    "torch==2.7.0",
-    "torchvision==0.21.0",
+    "torchvision==0.22.0",
 ]
 [tool.pytest.ini_options]
--- a/server/requirements_cuda.txt
+++ b/server/requirements_cuda.txt
@ -1,43 +1,43 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml --extra attention --extra bnb --extra accelerate --extra compressed-tensors --extra marlin --extra moe --extra quantize --extra peft --extra outlines -o requirements_cuda.txt --python-version 3.11
+#    uv pip compile pyproject.toml --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_cuda.txt --python-version 3.11
-accelerate==1.3.0
+accelerate==1.6.0
    # via
    #   text-generation-server (pyproject.toml)
    #   peft
-aiohappyeyeballs==2.4.4
+aiohappyeyeballs==2.6.1
    # via aiohttp
-aiohttp==3.11.11
+aiohttp==3.11.18
    # via
    #   datasets
    #   fsspec
 aiosignal==1.3.2
    # via aiohttp
-airportsdata==20241001
+airportsdata==20250224
    # via outlines
 annotated-types==0.7.0
    # via pydantic
-attention-kernels @ https://github.com/danieldk/attention-kernels/releases/download/v0.2.0.post2/attention_kernels-0.2.0.post2+cu123torch2.5-cp39-abi3-linux_x86_64.whl
+attrs==25.3.0
    # via text-generation-server (pyproject.toml)
 attrs==25.1.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
-bitsandbytes==0.45.1
+bitsandbytes==0.45.5
    # via text-generation-server (pyproject.toml)
-certifi==2024.8.30
+certifi==2025.4.26
    # via requests
-charset-normalizer==3.4.0
+charset-normalizer==3.4.2
    # via requests
-click==8.1.7
+click==8.1.8
-    # via typer
+    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 cloudpickle==3.1.1
    # via outlines
-compressed-tensors==0.9.1
+compressed-tensors==0.9.4
    # via text-generation-server (pyproject.toml)
 datasets==2.21.0
    # via text-generation-server (pyproject.toml)
-deprecated==1.2.14
+deprecated==1.2.18
    # via
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-grpc
@ -49,15 +49,15 @@ dill==0.3.8
    #   multiprocess
 diskcache==5.6.3
    # via outlines
-einops==0.8.0
+einops==0.8.1
    # via text-generation-server (pyproject.toml)
-filelock==3.16.1
+filelock==3.18.0
    # via
    #   datasets
    #   huggingface-hub
    #   torch
    #   transformers
-frozenlist==1.5.0
+frozenlist==1.6.0
    # via
    #   aiohttp
    #   aiosignal
@ -68,30 +68,36 @@ fsspec==2024.6.1
    #   torch
 genson==1.3.0
    # via outlines
-googleapis-common-protos==1.65.0
+googleapis-common-protos==1.70.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
-grpcio==1.68.0
+grpcio==1.71.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
-grpcio-reflection==1.68.0
+grpcio-reflection==1.71.0
    # via text-generation-server (pyproject.toml)
-grpcio-status==1.68.0
+grpcio-status==1.71.0
    # via text-generation-server (pyproject.toml)
-hf-transfer==0.1.8
+hf-transfer==0.1.9
    # via text-generation-server (pyproject.toml)
-huggingface-hub==0.28.1
+hf-xet==1.1.0
    # via
    #   text-generation-server (pyproject.toml)
    #   huggingface-hub
 huggingface-hub==0.31.1
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   datasets
    #   kernels
    #   peft
    #   tokenizers
    #   transformers
@ -99,13 +105,15 @@ idna==3.10
    # via
    #   requests
    #   yarl
-importlib-metadata==7.1.0
+importlib-metadata==8.6.1
    # via opentelemetry-api
 interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
-jinja2==3.1.5
+iso3166==2.1.1
    # via outlines
 jinja2==3.1.6
    # via
    #   outlines
    #   torch
@ -113,8 +121,10 @@ jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
-jsonschema-specifications==2024.10.1
+jsonschema-specifications==2025.4.1
    # via jsonschema
 kernels==0.5.0
    # via text-generation-server (pyproject.toml)
 lark==1.2.2
    # via outlines
 loguru==0.7.3
@ -123,15 +133,11 @@ markdown-it-py==3.0.0
    # via rich
 markupsafe==3.0.2
    # via jinja2
 marlin-kernels @ https://github.com/danieldk/marlin-kernels/releases/download/v0.3.7/marlin_kernels-0.3.7+cu123torch2.5-cp311-cp311-linux_x86_64.whl
    # via text-generation-server (pyproject.toml)
 mdurl==0.1.2
    # via markdown-it-py
 moe-kernels @ https://github.com/danieldk/moe-kernels/releases/download/v0.8.2/moe_kernels-0.8.2+cu123torch2.5-cp39-abi3-linux_x86_64.whl
    # via text-generation-server (pyproject.toml)
 mpmath==1.3.0
    # via sympy
-multidict==6.1.0
+multidict==6.4.3
    # via
    #   aiohttp
    #   yarl
@ -141,7 +147,7 @@ nest-asyncio==1.6.0
    # via outlines
 networkx==3.4.2
    # via torch
-numpy==1.26.4
+numpy==2.2.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
@ -152,43 +158,44 @@ numpy==1.26.4
    #   peft
    #   scipy
    #   transformers
-nvidia-cublas-cu12==12.4.5.8
+nvidia-cublas-cu12==12.6.4.1
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-cupti-cu12==12.6.80
    # via torch
-nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.6.77
    # via torch
-nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.6.77
    # via torch
-nvidia-cudnn-cu12==9.1.0.70
+nvidia-cudnn-cu12==9.5.1.17
    # via torch
-nvidia-cufft-cu12==11.2.1.3
+nvidia-cufft-cu12==11.3.0.4
    # via torch
-nvidia-curand-cu12==10.3.5.147
+nvidia-cufile-cu12==1.11.1.6
    # via torch
-nvidia-cusolver-cu12==11.6.1.9
+nvidia-curand-cu12==10.3.7.77
    # via torch
-nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusolver-cu12==11.7.1.2
    # via torch
 nvidia-cusparse-cu12==12.5.4.2
    # via
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cusparselt-cu12==0.6.2
+nvidia-cusparselt-cu12==0.6.3
    # via torch
-nvidia-ml-py==12.570.86
+nvidia-nccl-cu12==2.26.2
    # via moe-kernels
 nvidia-nccl-cu12==2.21.5
    # via torch
-nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvjitlink-cu12==12.6.85
    # via
    #   nvidia-cufft-cu12
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
-nvidia-nvtx-cu12==12.4.127
+nvidia-nvtx-cu12==12.6.77
    # via torch
-opentelemetry-api==1.30.0
+opentelemetry-api==1.33.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
@ -197,86 +204,85 @@ opentelemetry-api==1.30.0
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
-opentelemetry-exporter-otlp==1.30.0
+opentelemetry-exporter-otlp==1.33.0
    # via text-generation-server (pyproject.toml)
-opentelemetry-exporter-otlp-proto-common==1.30.0
+opentelemetry-exporter-otlp-proto-common==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-exporter-otlp-proto-grpc==1.30.0
+opentelemetry-exporter-otlp-proto-grpc==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-exporter-otlp-proto-http==1.30.0
+opentelemetry-exporter-otlp-proto-http==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation==0.54b0
    # via opentelemetry-instrumentation-grpc
-opentelemetry-instrumentation-grpc==0.51b0
+opentelemetry-instrumentation-grpc==0.54b0
    # via text-generation-server (pyproject.toml)
-opentelemetry-proto==1.30.0
+opentelemetry-proto==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-sdk==1.30.0
+opentelemetry-sdk==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-semantic-conventions==0.51b0
+opentelemetry-semantic-conventions==0.54b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
-outlines==0.1.14
+outlines==0.2.3
    # via text-generation-server (pyproject.toml)
 outlines-core==0.1.26
    # via outlines
-packaging==24.1
+packaging==25.0
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   kernels
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
 pandas==2.2.3
    # via datasets
-peft==0.14.0
+peft==0.15.2
    # via text-generation-server (pyproject.toml)
-pillow==11.1.0
+pillow==11.2.1
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
-propcache==0.2.1
+propcache==0.3.1
    # via
    #   aiohttp
    #   yarl
-protobuf==5.29.3
+protobuf==5.29.4
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
-psutil==6.1.1
+psutil==7.0.0
    # via
    #   accelerate
    #   peft
 py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
-pyarrow==19.0.0
+pyarrow==20.0.0
    # via datasets
-pycountry==24.6.1
+pydantic==2.11.4
    # via outlines
 pydantic==2.10.6
    # via
    #   compressed-tensors
    #   outlines
-pydantic-core==2.27.2
+pydantic-core==2.33.2
    # via pydantic
-pygments==2.18.0
+pygments==2.19.1
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
-pytz==2025.1
+pytz==2025.2
    # via pandas
 pyyaml==6.0.2
    # via
@ -290,7 +296,7 @@ referencing==0.36.2
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
-regex==2024.9.11
+regex==2024.11.6
    # via transformers
 requests==2.32.3
    # via
@ -299,65 +305,62 @@ requests==2.32.3
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
-rich==13.9.4
+rich==14.0.0
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
-rpds-py==0.22.3
+rpds-py==0.24.0
    # via
    #   jsonschema
    #   referencing
-safetensors==0.4.5
+safetensors==0.5.3
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
-scipy==1.13.1
+scipy==1.15.3
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
 setuptools==80.4.0
    # via triton
 shellingham==1.5.4
    # via typer
 six==1.17.0
    # via python-dateutil
-sympy==1.13.1
+sympy==1.14.0
    # via torch
 texttable==1.7.0
    # via text-generation-server (pyproject.toml)
-tokenizers==0.21.0
+tokenizers==0.21.1
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
-torch==2.6.0
+torch==2.7.0
    # via
    #   accelerate
    #   attention-kernels
    #   bitsandbytes
    #   compressed-tensors
    #   marlin-kernels
    #   moe-kernels
    #   outlines
    #   peft
-tqdm==4.66.5
+tqdm==4.67.1
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
-transformers==4.49
+transformers==4.51.3
    # via
    #   text-generation-server (pyproject.toml)
    #   compressed-tensors
    #   peft
-triton==3.2.0
+triton==3.3.0
-    # via
+    # via torch
-    #   moe-kernels
+typer==0.15.3
    #   torch
 typer==0.15.1
    # via text-generation-server (pyproject.toml)
-typing-extensions==4.12.2
+typing-extensions==4.13.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
@ -367,18 +370,21 @@ typing-extensions==4.12.2
    #   referencing
    #   torch
    #   typer
-tzdata==2025.1
+    #   typing-inspection
 typing-inspection==0.4.0
    # via pydantic
 tzdata==2025.2
    # via pandas
-urllib3==2.2.3
+urllib3==2.4.0
    # via requests
-wrapt==1.16.0
+wrapt==1.17.2
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
 xxhash==3.5.0
    # via datasets
-yarl==1.18.3
+yarl==1.20.0
    # via aiohttp
-zipp==3.20.2
+zipp==3.21.0
    # via importlib-metadata
--- a/server/requirements_gen.txt
+++ b/server/requirements_gen.txt
@ -1,33 +1,35 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml --extra gen -o requirements_gen.txt --python-version 3.11
-certifi==2025.1.31
+certifi==2025.4.26
    # via requests
-charset-normalizer==3.4.1
+charset-normalizer==3.4.2
    # via requests
 click==8.1.8
-    # via typer
+    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 deprecated==1.2.18
    # via
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
    #   opentelemetry-semantic-conventions
-einops==0.8.0
+einops==0.8.1
    # via text-generation-server (pyproject.toml)
-filelock==3.17.0
+filelock==3.18.0
    # via
    #   huggingface-hub
    #   transformers
-fsspec==2025.2.0
+fsspec==2025.3.2
    # via huggingface-hub
-googleapis-common-protos==1.66.0
+googleapis-common-protos==1.70.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
-grpcio==1.70.0
+grpcio==1.71.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
@ -35,22 +37,30 @@ grpcio==1.70.0
    #   grpcio-status
    #   grpcio-tools
    #   opentelemetry-exporter-otlp-proto-grpc
-grpcio-reflection==1.70.0
+grpcio-reflection==1.71.0
    # via text-generation-server (pyproject.toml)
-grpcio-status==1.70.0
+grpcio-status==1.71.0
    # via text-generation-server (pyproject.toml)
-grpcio-tools==1.70.0
+grpcio-tools==1.71.0
    # via text-generation-server (pyproject.toml)
 hf-transfer==0.1.9
    # via text-generation-server (pyproject.toml)
-huggingface-hub==0.28.1
+hf-xet==1.1.0
    # via
    #   text-generation-server (pyproject.toml)
    #   huggingface-hub
 huggingface-hub==0.31.1
    # via
    #   text-generation-server (pyproject.toml)
    #   kernels
    #   tokenizers
    #   transformers
 idna==3.10
    # via requests
-importlib-metadata==8.5.0
+importlib-metadata==8.6.1
    # via opentelemetry-api
 kernels==0.5.0
    # via text-generation-server (pyproject.toml)
 loguru==0.7.3
    # via text-generation-server (pyproject.toml)
 markdown-it-py==3.0.0
@ -59,12 +69,12 @@ mdurl==0.1.2
    # via markdown-it-py
 mypy-protobuf==3.6.0
    # via text-generation-server (pyproject.toml)
-numpy==2.2.2
+numpy==2.2.5
    # via
    #   text-generation-server (pyproject.toml)
    #   scipy
    #   transformers
-opentelemetry-api==1.30.0
+opentelemetry-api==1.33.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
@ -73,44 +83,45 @@ opentelemetry-api==1.30.0
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
-opentelemetry-exporter-otlp==1.30.0
+opentelemetry-exporter-otlp==1.33.0
    # via text-generation-server (pyproject.toml)
-opentelemetry-exporter-otlp-proto-common==1.30.0
+opentelemetry-exporter-otlp-proto-common==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-exporter-otlp-proto-grpc==1.30.0
+opentelemetry-exporter-otlp-proto-grpc==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-exporter-otlp-proto-http==1.30.0
+opentelemetry-exporter-otlp-proto-http==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation==0.54b0
    # via opentelemetry-instrumentation-grpc
-opentelemetry-instrumentation-grpc==0.51b0
+opentelemetry-instrumentation-grpc==0.54b0
    # via text-generation-server (pyproject.toml)
-opentelemetry-proto==1.30.0
+opentelemetry-proto==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-sdk==1.30.0
+opentelemetry-sdk==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-semantic-conventions==0.51b0
+opentelemetry-semantic-conventions==0.54b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
-packaging==24.2
+packaging==25.0
    # via
    #   huggingface-hub
    #   kernels
    #   opentelemetry-instrumentation
    #   transformers
-pillow==11.1.0
+pillow==11.2.1
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
-protobuf==5.29.3
+protobuf==5.29.4
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
@ -134,23 +145,23 @@ requests==2.32.3
    #   huggingface-hub
    #   opentelemetry-exporter-otlp-proto-http
    #   transformers
-rich==13.9.4
+rich==14.0.0
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
-safetensors==0.5.2
+safetensors==0.5.3
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
-scipy==1.15.1
+scipy==1.15.3
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
-setuptools==75.8.0
+setuptools==80.4.0
    # via grpcio-tools
 shellingham==1.5.4
    # via typer
-tokenizers==0.21.0
+tokenizers==0.21.1
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
@ -158,18 +169,18 @@ tqdm==4.67.1
    # via
    #   huggingface-hub
    #   transformers
-transformers==4.49
+transformers==4.51.3
    # via text-generation-server (pyproject.toml)
-typer==0.15.1
+typer==0.15.3
    # via text-generation-server (pyproject.toml)
-types-protobuf==5.29.1.20241207
+types-protobuf==6.30.2.20250506
    # via mypy-protobuf
-typing-extensions==4.12.2
+typing-extensions==4.13.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
    #   typer
-urllib3==2.3.0
+urllib3==2.4.0
    # via requests
 wrapt==1.17.2
    # via
--- a/server/requirements_intel.txt
+++ b/server/requirements_intel.txt
@ -1,39 +1,41 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_intel.txt --python-version 3.11
-accelerate==1.3.0
+accelerate==1.6.0
    # via
    #   text-generation-server (pyproject.toml)
    #   peft
-aiohappyeyeballs==2.4.4
+aiohappyeyeballs==2.6.1
    # via aiohttp
-aiohttp==3.11.11
+aiohttp==3.11.18
    # via
    #   datasets
    #   fsspec
 aiosignal==1.3.2
    # via aiohttp
-airportsdata==20241001
+airportsdata==20250224
    # via outlines
 annotated-types==0.7.0
    # via pydantic
-attrs==25.1.0
+attrs==25.3.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
-certifi==2024.8.30
+certifi==2025.4.26
    # via requests
-charset-normalizer==3.4.0
+charset-normalizer==3.4.2
    # via requests
-click==8.1.7
+click==8.1.8
-    # via typer
+    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 cloudpickle==3.1.1
    # via outlines
-compressed-tensors==0.9.1
+compressed-tensors==0.9.4
    # via text-generation-server (pyproject.toml)
 datasets==2.21.0
    # via text-generation-server (pyproject.toml)
-deprecated==1.2.14
+deprecated==1.2.18
    # via
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-grpc
@ -45,15 +47,15 @@ dill==0.3.8
    #   multiprocess
 diskcache==5.6.3
    # via outlines
-einops==0.8.0
+einops==0.8.1
    # via text-generation-server (pyproject.toml)
-filelock==3.16.1
+filelock==3.18.0
    # via
    #   datasets
    #   huggingface-hub
    #   torch
    #   transformers
-frozenlist==1.5.0
+frozenlist==1.6.0
    # via
    #   aiohttp
    #   aiosignal
@ -64,30 +66,36 @@ fsspec==2024.6.1
    #   torch
 genson==1.3.0
    # via outlines
-googleapis-common-protos==1.65.0
+googleapis-common-protos==1.70.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
-grpcio==1.68.0
+grpcio==1.71.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
-grpcio-reflection==1.68.0
+grpcio-reflection==1.71.0
    # via text-generation-server (pyproject.toml)
-grpcio-status==1.68.0
+grpcio-status==1.71.0
    # via text-generation-server (pyproject.toml)
-hf-transfer==0.1.8
+hf-transfer==0.1.9
    # via text-generation-server (pyproject.toml)
-huggingface-hub==0.28.1
+hf-xet==1.1.0
    # via
    #   text-generation-server (pyproject.toml)
    #   huggingface-hub
 huggingface-hub==0.31.1
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   datasets
    #   kernels
    #   peft
    #   tokenizers
    #   transformers
@ -95,13 +103,15 @@ idna==3.10
    # via
    #   requests
    #   yarl
-importlib-metadata==7.1.0
+importlib-metadata==8.6.1
    # via opentelemetry-api
 interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
-jinja2==3.1.5
+iso3166==2.1.1
    # via outlines
 jinja2==3.1.6
    # via
    #   outlines
    #   torch
@ -109,8 +119,10 @@ jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
-jsonschema-specifications==2024.10.1
+jsonschema-specifications==2025.4.1
    # via jsonschema
 kernels==0.5.0
    # via text-generation-server (pyproject.toml)
 lark==1.2.2
    # via outlines
 loguru==0.7.3
@ -123,7 +135,7 @@ mdurl==0.1.2
    # via markdown-it-py
 mpmath==1.3.0
    # via sympy
-multidict==6.1.0
+multidict==6.4.3
    # via
    #   aiohttp
    #   yarl
@ -133,7 +145,7 @@ nest-asyncio==1.6.0
    # via outlines
 networkx==3.4.2
    # via torch
-numpy==1.26.4
+numpy==2.2.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
@ -143,41 +155,44 @@ numpy==1.26.4
    #   peft
    #   scipy
    #   transformers
-nvidia-cublas-cu12==12.4.5.8
+nvidia-cublas-cu12==12.6.4.1
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-cupti-cu12==12.6.80
    # via torch
-nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.6.77
    # via torch
-nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.6.77
    # via torch
-nvidia-cudnn-cu12==9.1.0.70
+nvidia-cudnn-cu12==9.5.1.17
    # via torch
-nvidia-cufft-cu12==11.2.1.3
+nvidia-cufft-cu12==11.3.0.4
    # via torch
-nvidia-curand-cu12==10.3.5.147
+nvidia-cufile-cu12==1.11.1.6
    # via torch
-nvidia-cusolver-cu12==11.6.1.9
+nvidia-curand-cu12==10.3.7.77
    # via torch
-nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusolver-cu12==11.7.1.2
    # via torch
 nvidia-cusparse-cu12==12.5.4.2
    # via
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cusparselt-cu12==0.6.2
+nvidia-cusparselt-cu12==0.6.3
    # via torch
-nvidia-nccl-cu12==2.21.5
+nvidia-nccl-cu12==2.26.2
    # via torch
-nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvjitlink-cu12==12.6.85
    # via
    #   nvidia-cufft-cu12
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
-nvidia-nvtx-cu12==12.4.127
+nvidia-nvtx-cu12==12.6.77
    # via torch
-opentelemetry-api==1.30.0
+opentelemetry-api==1.33.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
@ -186,86 +201,85 @@ opentelemetry-api==1.30.0
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
-opentelemetry-exporter-otlp==1.30.0
+opentelemetry-exporter-otlp==1.33.0
    # via text-generation-server (pyproject.toml)
-opentelemetry-exporter-otlp-proto-common==1.30.0
+opentelemetry-exporter-otlp-proto-common==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-exporter-otlp-proto-grpc==1.30.0
+opentelemetry-exporter-otlp-proto-grpc==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-exporter-otlp-proto-http==1.30.0
+opentelemetry-exporter-otlp-proto-http==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation==0.54b0
    # via opentelemetry-instrumentation-grpc
-opentelemetry-instrumentation-grpc==0.51b0
+opentelemetry-instrumentation-grpc==0.54b0
    # via text-generation-server (pyproject.toml)
-opentelemetry-proto==1.30.0
+opentelemetry-proto==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-sdk==1.30.0
+opentelemetry-sdk==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-semantic-conventions==0.51b0
+opentelemetry-semantic-conventions==0.54b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
-outlines==0.1.14
+outlines==0.2.3
    # via text-generation-server (pyproject.toml)
 outlines-core==0.1.26
    # via outlines
-packaging==24.1
+packaging==25.0
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   kernels
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
 pandas==2.2.3
    # via datasets
-peft==0.14.0
+peft==0.15.2
    # via text-generation-server (pyproject.toml)
-pillow==11.1.0
+pillow==11.2.1
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
-propcache==0.2.1
+propcache==0.3.1
    # via
    #   aiohttp
    #   yarl
-protobuf==5.29.3
+protobuf==5.29.4
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
-psutil==6.1.1
+psutil==7.0.0
    # via
    #   accelerate
    #   peft
 py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
-pyarrow==19.0.0
+pyarrow==20.0.0
    # via datasets
-pycountry==24.6.1
+pydantic==2.11.4
    # via outlines
 pydantic==2.10.6
    # via
    #   compressed-tensors
    #   outlines
-pydantic-core==2.27.2
+pydantic-core==2.33.2
    # via pydantic
-pygments==2.18.0
+pygments==2.19.1
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
-pytz==2025.1
+pytz==2025.2
    # via pandas
 pyyaml==6.0.2
    # via
@ -279,7 +293,7 @@ referencing==0.36.2
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
-regex==2024.9.11
+regex==2024.11.6
    # via transformers
 requests==2.32.3
    # via
@ -288,59 +302,61 @@ requests==2.32.3
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
-rich==13.9.4
+rich==14.0.0
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
-rpds-py==0.22.3
+rpds-py==0.24.0
    # via
    #   jsonschema
    #   referencing
-safetensors==0.4.5
+safetensors==0.5.3
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
-scipy==1.13.1
+scipy==1.15.3
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
 setuptools==80.4.0
    # via triton
 shellingham==1.5.4
    # via typer
 six==1.17.0
    # via python-dateutil
-sympy==1.13.1
+sympy==1.14.0
    # via torch
 texttable==1.7.0
    # via text-generation-server (pyproject.toml)
-tokenizers==0.21.0
+tokenizers==0.21.1
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
-torch==2.6.0
+torch==2.7.0
    # via
    #   accelerate
    #   compressed-tensors
    #   outlines
    #   peft
-tqdm==4.66.5
+tqdm==4.67.1
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
-transformers==4.49
+transformers==4.51.3
    # via
    #   text-generation-server (pyproject.toml)
    #   compressed-tensors
    #   peft
-triton==3.2.0
+triton==3.3.0
    # via torch
-typer==0.15.1
+typer==0.15.3
    # via text-generation-server (pyproject.toml)
-typing-extensions==4.12.2
+typing-extensions==4.13.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
@ -350,18 +366,21 @@ typing-extensions==4.12.2
    #   referencing
    #   torch
    #   typer
-tzdata==2025.1
+    #   typing-inspection
 typing-inspection==0.4.0
    # via pydantic
 tzdata==2025.2
    # via pandas
-urllib3==2.2.3
+urllib3==2.4.0
    # via requests
-wrapt==1.16.0
+wrapt==1.17.2
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
 xxhash==3.5.0
    # via datasets
-yarl==1.18.3
+yarl==1.20.0
    # via aiohttp
-zipp==3.20.2
+zipp==3.21.0
    # via importlib-metadata
--- a/server/requirements_rocm.txt
+++ b/server/requirements_rocm.txt
@ -1,39 +1,41 @@
 # This file was autogenerated by uv via the following command:
 #    uv pip compile pyproject.toml --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines -o requirements_rocm.txt --python-version 3.11
-accelerate==1.3.0
+accelerate==1.6.0
    # via
    #   text-generation-server (pyproject.toml)
    #   peft
-aiohappyeyeballs==2.4.4
+aiohappyeyeballs==2.6.1
    # via aiohttp
-aiohttp==3.11.11
+aiohttp==3.11.18
    # via
    #   datasets
    #   fsspec
 aiosignal==1.3.2
    # via aiohttp
-airportsdata==20241001
+airportsdata==20250224
    # via outlines
 annotated-types==0.7.0
    # via pydantic
-attrs==25.1.0
+attrs==25.3.0
    # via
    #   aiohttp
    #   jsonschema
    #   referencing
-certifi==2024.8.30
+certifi==2025.4.26
    # via requests
-charset-normalizer==3.4.0
+charset-normalizer==3.4.2
    # via requests
-click==8.1.7
+click==8.1.8
-    # via typer
+    # via
    #   text-generation-server (pyproject.toml)
    #   typer
 cloudpickle==3.1.1
    # via outlines
-compressed-tensors==0.9.1
+compressed-tensors==0.9.4
    # via text-generation-server (pyproject.toml)
 datasets==2.21.0
    # via text-generation-server (pyproject.toml)
-deprecated==1.2.14
+deprecated==1.2.18
    # via
    #   opentelemetry-api
    #   opentelemetry-exporter-otlp-proto-grpc
@ -45,15 +47,15 @@ dill==0.3.8
    #   multiprocess
 diskcache==5.6.3
    # via outlines
-einops==0.8.0
+einops==0.8.1
    # via text-generation-server (pyproject.toml)
-filelock==3.16.1
+filelock==3.18.0
    # via
    #   datasets
    #   huggingface-hub
    #   torch
    #   transformers
-frozenlist==1.5.0
+frozenlist==1.6.0
    # via
    #   aiohttp
    #   aiosignal
@ -64,30 +66,36 @@ fsspec==2024.6.1
    #   torch
 genson==1.3.0
    # via outlines
-googleapis-common-protos==1.65.0
+googleapis-common-protos==1.70.0
    # via
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
 grpc-interceptor==0.15.4
    # via text-generation-server (pyproject.toml)
-grpcio==1.68.0
+grpcio==1.71.0
    # via
    #   text-generation-server (pyproject.toml)
    #   grpc-interceptor
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-exporter-otlp-proto-grpc
-grpcio-reflection==1.68.0
+grpcio-reflection==1.71.0
    # via text-generation-server (pyproject.toml)
-grpcio-status==1.68.0
+grpcio-status==1.71.0
    # via text-generation-server (pyproject.toml)
-hf-transfer==0.1.8
+hf-transfer==0.1.9
    # via text-generation-server (pyproject.toml)
-huggingface-hub==0.28.1
+hf-xet==1.1.0
    # via
    #   text-generation-server (pyproject.toml)
    #   huggingface-hub
 huggingface-hub==0.31.1
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   datasets
    #   kernels
    #   peft
    #   tokenizers
    #   transformers
@ -95,13 +103,15 @@ idna==3.10
    # via
    #   requests
    #   yarl
-importlib-metadata==7.1.0
+importlib-metadata==8.6.1
    # via opentelemetry-api
 interegular==0.3.3
    # via
    #   outlines
    #   outlines-core
-jinja2==3.1.5
+iso3166==2.1.1
    # via outlines
 jinja2==3.1.6
    # via
    #   outlines
    #   torch
@ -109,8 +119,10 @@ jsonschema==4.23.0
    # via
    #   outlines
    #   outlines-core
-jsonschema-specifications==2024.10.1
+jsonschema-specifications==2025.4.1
    # via jsonschema
 kernels==0.5.0
    # via text-generation-server (pyproject.toml)
 lark==1.2.2
    # via outlines
 loguru==0.7.3
@ -123,7 +135,7 @@ mdurl==0.1.2
    # via markdown-it-py
 mpmath==1.3.0
    # via sympy
-multidict==6.1.0
+multidict==6.4.3
    # via
    #   aiohttp
    #   yarl
@ -133,7 +145,7 @@ nest-asyncio==1.6.0
    # via outlines
 networkx==3.4.2
    # via torch
-numpy==1.26.4
+numpy==2.2.5
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
@ -143,41 +155,44 @@ numpy==1.26.4
    #   peft
    #   scipy
    #   transformers
-nvidia-cublas-cu12==12.4.5.8
+nvidia-cublas-cu12==12.6.4.1
    # via
    #   nvidia-cudnn-cu12
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cuda-cupti-cu12==12.6.80
    # via torch
-nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.6.77
    # via torch
-nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cuda-runtime-cu12==12.6.77
    # via torch
-nvidia-cudnn-cu12==9.1.0.70
+nvidia-cudnn-cu12==9.5.1.17
    # via torch
-nvidia-cufft-cu12==11.2.1.3
+nvidia-cufft-cu12==11.3.0.4
    # via torch
-nvidia-curand-cu12==10.3.5.147
+nvidia-cufile-cu12==1.11.1.6
    # via torch
-nvidia-cusolver-cu12==11.6.1.9
+nvidia-curand-cu12==10.3.7.77
    # via torch
-nvidia-cusparse-cu12==12.3.1.170
+nvidia-cusolver-cu12==11.7.1.2
    # via torch
 nvidia-cusparse-cu12==12.5.4.2
    # via
    #   nvidia-cusolver-cu12
    #   torch
-nvidia-cusparselt-cu12==0.6.2
+nvidia-cusparselt-cu12==0.6.3
    # via torch
-nvidia-nccl-cu12==2.21.5
+nvidia-nccl-cu12==2.26.2
    # via torch
-nvidia-nvjitlink-cu12==12.4.127
+nvidia-nvjitlink-cu12==12.6.85
    # via
    #   nvidia-cufft-cu12
    #   nvidia-cusolver-cu12
    #   nvidia-cusparse-cu12
    #   torch
-nvidia-nvtx-cu12==12.4.127
+nvidia-nvtx-cu12==12.6.77
    # via torch
-opentelemetry-api==1.30.0
+opentelemetry-api==1.33.0
    # via
    #   text-generation-server (pyproject.toml)
    #   opentelemetry-exporter-otlp-proto-grpc
@ -186,86 +201,85 @@ opentelemetry-api==1.30.0
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
    #   opentelemetry-semantic-conventions
-opentelemetry-exporter-otlp==1.30.0
+opentelemetry-exporter-otlp==1.33.0
    # via text-generation-server (pyproject.toml)
-opentelemetry-exporter-otlp-proto-common==1.30.0
+opentelemetry-exporter-otlp-proto-common==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-exporter-otlp-proto-grpc==1.30.0
+opentelemetry-exporter-otlp-proto-grpc==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-exporter-otlp-proto-http==1.30.0
+opentelemetry-exporter-otlp-proto-http==1.33.0
    # via opentelemetry-exporter-otlp
-opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation==0.54b0
    # via opentelemetry-instrumentation-grpc
-opentelemetry-instrumentation-grpc==0.51b0
+opentelemetry-instrumentation-grpc==0.54b0
    # via text-generation-server (pyproject.toml)
-opentelemetry-proto==1.30.0
+opentelemetry-proto==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-common
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-sdk==1.30.0
+opentelemetry-sdk==1.33.0
    # via
    #   opentelemetry-exporter-otlp-proto-grpc
    #   opentelemetry-exporter-otlp-proto-http
-opentelemetry-semantic-conventions==0.51b0
+opentelemetry-semantic-conventions==0.54b0
    # via
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
    #   opentelemetry-sdk
-outlines==0.1.14
+outlines==0.2.3
    # via text-generation-server (pyproject.toml)
 outlines-core==0.1.26
    # via outlines
-packaging==24.1
+packaging==25.0
    # via
    #   accelerate
    #   datasets
    #   huggingface-hub
    #   kernels
    #   opentelemetry-instrumentation
    #   peft
    #   transformers
 pandas==2.2.3
    # via datasets
-peft==0.14.0
+peft==0.15.2
    # via text-generation-server (pyproject.toml)
-pillow==11.1.0
+pillow==11.2.1
    # via text-generation-server (pyproject.toml)
 prometheus-client==0.21.1
    # via text-generation-server (pyproject.toml)
-propcache==0.2.1
+propcache==0.3.1
    # via
    #   aiohttp
    #   yarl
-protobuf==5.29.3
+protobuf==5.29.4
    # via
    #   text-generation-server (pyproject.toml)
    #   googleapis-common-protos
    #   grpcio-reflection
    #   grpcio-status
    #   opentelemetry-proto
-psutil==6.1.1
+psutil==7.0.0
    # via
    #   accelerate
    #   peft
 py-cpuinfo==9.0.0
    # via text-generation-server (pyproject.toml)
-pyarrow==19.0.0
+pyarrow==20.0.0
    # via datasets
-pycountry==24.6.1
+pydantic==2.11.4
    # via outlines
 pydantic==2.10.6
    # via
    #   compressed-tensors
    #   outlines
-pydantic-core==2.27.2
+pydantic-core==2.33.2
    # via pydantic
-pygments==2.18.0
+pygments==2.19.1
    # via rich
 python-dateutil==2.9.0.post0
    # via pandas
-pytz==2025.1
+pytz==2025.2
    # via pandas
 pyyaml==6.0.2
    # via
@ -279,7 +293,7 @@ referencing==0.36.2
    #   jsonschema
    #   jsonschema-specifications
    #   outlines
-regex==2024.9.11
+regex==2024.11.6
    # via transformers
 requests==2.32.3
    # via
@ -288,59 +302,61 @@ requests==2.32.3
    #   opentelemetry-exporter-otlp-proto-http
    #   outlines
    #   transformers
-rich==13.9.4
+rich==14.0.0
    # via
    #   text-generation-server (pyproject.toml)
    #   typer
-rpds-py==0.22.3
+rpds-py==0.24.0
    # via
    #   jsonschema
    #   referencing
-safetensors==0.4.5
+safetensors==0.5.3
    # via
    #   text-generation-server (pyproject.toml)
    #   accelerate
    #   peft
    #   transformers
-scipy==1.13.1
+scipy==1.15.3
    # via text-generation-server (pyproject.toml)
 sentencepiece==0.2.0
    # via text-generation-server (pyproject.toml)
 setuptools==80.4.0
    # via triton
 shellingham==1.5.4
    # via typer
 six==1.17.0
    # via python-dateutil
-sympy==1.13.1
+sympy==1.14.0
    # via torch
 texttable==1.7.0
    # via text-generation-server (pyproject.toml)
-tokenizers==0.21.0
+tokenizers==0.21.1
    # via
    #   text-generation-server (pyproject.toml)
    #   transformers
-torch==2.6.0
+torch==2.7.0
    # via
    #   accelerate
    #   compressed-tensors
    #   outlines
    #   peft
-tqdm==4.66.5
+tqdm==4.67.1
    # via
    #   datasets
    #   huggingface-hub
    #   outlines
    #   peft
    #   transformers
-transformers==4.49
+transformers==4.51.3
    # via
    #   text-generation-server (pyproject.toml)
    #   compressed-tensors
    #   peft
-triton==3.2.0
+triton==3.3.0
    # via torch
-typer==0.15.1
+typer==0.15.3
    # via text-generation-server (pyproject.toml)
-typing-extensions==4.12.2
+typing-extensions==4.13.2
    # via
    #   huggingface-hub
    #   opentelemetry-sdk
@ -350,18 +366,21 @@ typing-extensions==4.12.2
    #   referencing
    #   torch
    #   typer
-tzdata==2025.1
+    #   typing-inspection
 typing-inspection==0.4.0
    # via pydantic
 tzdata==2025.2
    # via pandas
-urllib3==2.2.3
+urllib3==2.4.0
    # via requests
-wrapt==1.16.0
+wrapt==1.17.2
    # via
    #   deprecated
    #   opentelemetry-instrumentation
    #   opentelemetry-instrumentation-grpc
 xxhash==3.5.0
    # via datasets
-yarl==1.18.3
+yarl==1.20.0
    # via aiohttp
-zipp==3.20.2
+zipp==3.21.0
    # via importlib-metadata
--- a/server/uv.lock
+++ b/server/uv.lock
@ -2,13 +2,17 @@ version = 1
 revision = 1
 requires-python = ">=3.9"
 resolution-markers = [
-    "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
+    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version >= '3.12' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.11.*' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.11.*' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
    "python_full_version == '3.10.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.10.*' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
+    "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
    "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version < '3.10' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
@ -24,8 +28,8 @@ dependencies = [
    { name = "psutil" },
    { name = "pyyaml" },
    { name = "safetensors" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/85/15/0fab0260ab4069e5224e637d2e400538bb27b0dfc36f17daf68db9770d78/accelerate-1.3.0.tar.gz", hash = "sha256:518631c0adb80bd3d42fb29e7e2dc2256bcd7c786b0ba9119bbaa08611b36d9c", size = 342758 }
 wheels = [
@ -194,8 +198,8 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
    { name = "numpy", version = "2.2.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/db/9d/9382259196d7ad7f3550702390081224e673a705e75b5660ee377b592fc0/bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl", hash = "sha256:ba3a720187f518b172ebce4081049c682ae3fd8284947e22499b256ff99a2bc3", size = 69680042 },
@ -321,8 +325,8 @@ version = "0.9.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "pydantic" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "transformers" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/40/e0/d9529aae2d2425d214e5a50497df4532d3f9e21c8d2023037c701f8a37d3/compressed-tensors-0.9.1.tar.gz", hash = "sha256:3cf5cd637f0186c184dd5bbbbf941356b1225199b49c6a45bf0909d65907f686", size = 63060 }
@ -833,8 +837,8 @@ dependencies = [
    { name = "huggingface-hub" },
    { name = "packaging" },
    { name = "tomli", marker = "python_full_version < '3.11'" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/26/99/41af9dce502bb1682977fee1bc487a73fa8418cebbce16b8d27733947375/kernels-0.2.1.tar.gz", hash = "sha256:918942332819b28377b9d07070daddecfd8a5e7bab574dd3dc64a209ca6008b2", size = 9395 }
@ -1092,7 +1096,8 @@ name = "networkx"
 version = "3.2.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
+    "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
    "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version < '3.10' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c4/80/a84676339aaae2f1cfdf9f418701dd634aef9cc76f708ef55c36ff39c3ca/networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6", size = 2073928 }
@ -1105,11 +1110,14 @@ name = "networkx"
 version = "3.4.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
+    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version >= '3.12' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.11.*' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.11.*' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
    "python_full_version == '3.10.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.10.*' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fd/1d/06475e1cd5264c0b870ea2cc6fdb3e37177c1e565c43f56ff17a10e3937f/networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1", size = 2151368 }
@ -1122,7 +1130,8 @@ name = "numpy"
 version = "2.0.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
+    "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
    "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version < '3.10' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a9/75/10dd1f8116a8b796cb2c737b674e02d02e80454bda953fa7e65d8c12b016/numpy-2.0.2.tar.gz", hash = "sha256:883c987dee1880e2a864ab0dc9892292582510604156762362d9326444636e78", size = 18902015 }
@ -1178,11 +1187,14 @@ name = "numpy"
 version = "2.2.2"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
+    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version >= '3.12' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.11.*' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.11.*' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
    "python_full_version == '3.10.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.10.*' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ec/d0/c12ddfd3a02274be06ffc71f3efc6d0e457b0409c4481596881e748cb264/numpy-2.2.2.tar.gz", hash = "sha256:ed6906f61834d687738d25988ae117683705636936cc605be0bb208b23df4d8f", size = 20233295 }
@ -1245,120 +1257,128 @@ wheels = [
 [[package]]
 name = "nvidia-cublas-cu12"
-version = "12.4.5.8"
+version = "12.8.3.14"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 },
+    { url = "https://files.pythonhosted.org/packages/82/df/4b01f10069e23c641f116c62fc31e31e8dc361a153175d81561d15c8143b/nvidia_cublas_cu12-12.8.3.14-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:3f0e05e7293598cf61933258b73e66a160c27d59c4422670bf0b79348c04be44", size = 609620630 },
 ]
 [[package]]
 name = "nvidia-cuda-cupti-cu12"
-version = "12.4.127"
+version = "12.8.57"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 },
+    { url = "https://files.pythonhosted.org/packages/39/6f/3683ecf4e38931971946777d231c2df00dd5c1c4c2c914c42ad8f9f4dca6/nvidia_cuda_cupti_cu12-12.8.57-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8e0b2eb847de260739bee4a3f66fac31378f4ff49538ff527a38a01a9a39f950", size = 10237547 },
 ]
 [[package]]
 name = "nvidia-cuda-nvrtc-cu12"
-version = "12.4.127"
+version = "12.8.61"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 },
+    { url = "https://files.pythonhosted.org/packages/d4/22/32029d4583f7b19cfe75c84399cbcfd23f2aaf41c66fc8db4da460104fff/nvidia_cuda_nvrtc_cu12-12.8.61-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a0fa9c2a21583105550ebd871bd76e2037205d56f33f128e69f6d2a55e0af9ed", size = 88024585 },
 ]
 [[package]]
 name = "nvidia-cuda-runtime-cu12"
-version = "12.4.127"
+version = "12.8.57"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 },
+    { url = "https://files.pythonhosted.org/packages/16/f6/0e1ef31f4753a44084310ba1a7f0abaf977ccd810a604035abb43421c057/nvidia_cuda_runtime_cu12-12.8.57-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:75342e28567340b7428ce79a5d6bb6ca5ff9d07b69e7ce00d2c7b4dc23eff0be", size = 954762 },
 ]
 [[package]]
 name = "nvidia-cudnn-cu12"
-version = "9.1.0.70"
+version = "9.7.1.26"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 },
+    { url = "https://files.pythonhosted.org/packages/25/dc/dc825c4b1c83b538e207e34f48f86063c88deaa35d46c651c7c181364ba2/nvidia_cudnn_cu12-9.7.1.26-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:6d011159a158f3cfc47bf851aea79e31bcff60d530b70ef70474c84cac484d07", size = 726851421 },
 ]
 [[package]]
 name = "nvidia-cufft-cu12"
-version = "11.2.1.3"
+version = "11.3.3.41"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 },
+    { url = "https://files.pythonhosted.org/packages/ac/26/b53c493c38dccb1f1a42e1a21dc12cba2a77fbe36c652f7726d9ec4aba28/nvidia_cufft_cu12-11.3.3.41-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:da650080ab79fcdf7a4b06aa1b460e99860646b176a43f6208099bdc17836b6a", size = 193118795 },
 ]
 [[package]]
 name = "nvidia-cufile-cu12"
 version = "1.13.0.11"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/e5/9c/1f3264d0a84c8a031487fb7f59780fc78fa6f1c97776233956780e3dc3ac/nvidia_cufile_cu12-1.13.0.11-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:483f434c541806936b98366f6d33caef5440572de8ddf38d453213729da3e7d4", size = 1197801 },
 ]
 [[package]]
 name = "nvidia-curand-cu12"
-version = "10.3.5.147"
+version = "10.3.9.55"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 },
+    { url = "https://files.pythonhosted.org/packages/bd/fc/7be5d0082507269bb04ac07cc614c84b78749efb96e8cf4100a8a1178e98/nvidia_curand_cu12-10.3.9.55-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8387d974240c91f6a60b761b83d4b2f9b938b7e0b9617bae0f0dafe4f5c36b86", size = 63618038 },
 ]
 [[package]]
 name = "nvidia-cusolver-cu12"
-version = "11.6.1.9"
+version = "11.7.2.55"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
-    { name = "nvidia-cusparse-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 },
+    { url = "https://files.pythonhosted.org/packages/c2/08/953675873a136d96bb12f93b49ba045d1107bc94d2551c52b12fa6c7dec3/nvidia_cusolver_cu12-11.7.2.55-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4d1354102f1e922cee9db51920dba9e2559877cf6ff5ad03a00d853adafb191b", size = 260373342 },
 ]
 [[package]]
 name = "nvidia-cusparse-cu12"
-version = "12.3.1.170"
+version = "12.5.7.53"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 },
+    { url = "https://files.pythonhosted.org/packages/c2/ab/31e8149c66213b846c082a3b41b1365b831f41191f9f40c6ddbc8a7d550e/nvidia_cusparse_cu12-12.5.7.53-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c1b61eb8c85257ea07e9354606b26397612627fdcd327bfd91ccf6155e7c86d", size = 292064180 },
 ]
 [[package]]
 name = "nvidia-cusparselt-cu12"
-version = "0.6.2"
+version = "0.6.3"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/a8/bcbb63b53a4b1234feeafb65544ee55495e1bb37ec31b999b963cbccfd1d/nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9", size = 150057751 },
+    { url = "https://files.pythonhosted.org/packages/3b/9a/72ef35b399b0e183bc2e8f6f558036922d453c4d8237dab26c666a04244b/nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46", size = 156785796 },
 ]
 [[package]]
 name = "nvidia-nccl-cu12"
-version = "2.21.5"
+version = "2.26.2"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/df/99/12cd266d6233f47d00daf3a72739872bdc10267d0383508b0b9c84a18bb6/nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0", size = 188654414 },
+    { url = "https://files.pythonhosted.org/packages/67/ca/f42388aed0fddd64ade7493dbba36e1f534d4e6fdbdd355c6a90030ae028/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6", size = 201319755 },
 ]
 [[package]]
 name = "nvidia-nvjitlink-cu12"
-version = "12.4.127"
+version = "12.8.61"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 },
+    { url = "https://files.pythonhosted.org/packages/03/f8/9d85593582bd99b8d7c65634d2304780aefade049b2b94d96e44084be90b/nvidia_nvjitlink_cu12-12.8.61-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:45fd79f2ae20bd67e8bc411055939049873bfd8fac70ff13bd4865e0b9bdab17", size = 39243473 },
 ]
 [[package]]
 name = "nvidia-nvtx-cu12"
-version = "12.4.127"
+version = "12.8.55"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
+    { url = "https://files.pythonhosted.org/packages/8d/cd/0e8c51b2ae3a58f054f2e7fe91b82d201abfb30167f2431e9bd92d532f42/nvidia_nvtx_cu12-12.8.55-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2dd0780f1a55c21d8e06a743de5bd95653de630decfff40621dbde78cc307102", size = 89896 },
 ]
 [[package]]
@ -1525,8 +1545,8 @@ dependencies = [
    { name = "pydantic" },
    { name = "referencing" },
    { name = "requests" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "tqdm" },
    { name = "typing-extensions" },
 ]
@ -1649,8 +1669,8 @@ dependencies = [
    { name = "psutil" },
    { name = "pyyaml" },
    { name = "safetensors" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
    { name = "tqdm" },
    { name = "transformers" },
 ]
@ -2418,7 +2438,8 @@ name = "scipy"
 version = "1.13.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
+    "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
    "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version < '3.10' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
@ -2457,11 +2478,14 @@ name = "scipy"
 version = "1.15.1"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
-    "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
+    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version >= '3.12' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.11.*' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.11.*' and sys_platform != 'linux' and sys_platform != 'win32'",
-    "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
    "python_full_version == '3.10.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.10.*' and sys_platform != 'linux' and sys_platform != 'win32'",
 ]
 dependencies = [
@ -2579,14 +2603,14 @@ wheels = [
 [[package]]
 name = "sympy"
-version = "1.13.1"
+version = "1.14.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "mpmath" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/ca/99/5a5b6f19ff9f083671ddf7b9632028436167cd3d33e11015754e41b249a4/sympy-1.13.1.tar.gz", hash = "sha256:9cebf7e04ff162015ce31c9c6c9144daa34a93bd082f54fd8f12deca4f47515f", size = 7533040 }
+sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177 },
+    { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353 },
 ]
 [[package]]
@ -2594,6 +2618,7 @@ name = "text-generation-server"
 version = "2.0.5.dev0"
 source = { editable = "." }
 dependencies = [
    { name = "click" },
    { name = "einops" },
    { name = "grpc-interceptor" },
    { name = "grpcio" },
@ -2652,16 +2677,18 @@ quantize = [
    { name = "texttable" },
 ]
 torch = [
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
-    { name = "torchvision", version = "0.21.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torchvision", version = "0.22.0", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
-    { name = "torchvision", version = "0.21.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
    { name = "torchvision", version = "0.22.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
 ]
 [package.metadata]
 requires-dist = [
    { name = "accelerate", marker = "extra == 'accelerate'", specifier = ">=1.2.1,<2" },
    { name = "bitsandbytes", marker = "extra == 'bnb'", specifier = ">=0.45.0" },
    { name = "click", specifier = "<8.2.0" },
    { name = "compressed-tensors", marker = "extra == 'compressed-tensors'", specifier = ">=0.9.0" },
    { name = "datasets", marker = "extra == 'quantize'", specifier = ">=2.21,<3" },
    { name = "einops", specifier = ">=0.8.0" },
@ -2694,10 +2721,10 @@ requires-dist = [
    { name = "sentencepiece", specifier = ">=0.2.0" },
    { name = "texttable", marker = "extra == 'quantize'", specifier = ">=1.6.7,<2" },
    { name = "tokenizers", specifier = ">=0.20.3" },
-    { name = "torch", marker = "(sys_platform == 'linux' and extra == 'torch') or (sys_platform == 'win32' and extra == 'torch')", specifier = "==2.6.0", index = "https://download.pytorch.org/whl/cu124" },
+    { name = "torch", marker = "(sys_platform == 'linux' and extra == 'torch') or (sys_platform == 'win32' and extra == 'torch')", specifier = "==2.7.0", index = "https://download.pytorch.org/whl/cu128" },
-    { name = "torch", marker = "sys_platform != 'linux' and sys_platform != 'win32' and extra == 'torch'", specifier = "==2.6.0" },
+    { name = "torch", marker = "sys_platform != 'linux' and sys_platform != 'win32' and extra == 'torch'", specifier = "==2.7.0" },
-    { name = "torchvision", marker = "(sys_platform == 'linux' and extra == 'torch') or (sys_platform == 'win32' and extra == 'torch')", specifier = "==0.21.0", index = "https://download.pytorch.org/whl/cu124" },
+    { name = "torchvision", marker = "(sys_platform == 'linux' and extra == 'torch') or (sys_platform == 'win32' and extra == 'torch')", specifier = "==0.22.0", index = "https://download.pytorch.org/whl/cu128" },
-    { name = "torchvision", marker = "sys_platform != 'linux' and sys_platform != 'win32' and extra == 'torch'", specifier = "==0.21.0" },
+    { name = "torchvision", marker = "sys_platform != 'linux' and sys_platform != 'win32' and extra == 'torch'", specifier = "==0.22.0" },
    { name = "transformers", specifier = ">=4.51.0" },
    { name = "typer", specifier = ">=0.15.1" },
 ]
@ -2778,7 +2805,7 @@ wheels = [
 [[package]]
 name = "torch"
-version = "2.6.0"
+version = "2.7.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.12' and sys_platform != 'linux' and sys_platform != 'win32'",
@ -2797,22 +2824,27 @@ dependencies = [
    { name = "typing-extensions", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e5/16/ea1b7842413a7b8a5aaa5e99e8eaf3da3183cc3ab345ad025a07ff636301/torch-2.6.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:09e06f9949e1a0518c5b09fe95295bc9661f219d9ecb6f9893e5123e10696628", size = 66520221 },
+    { url = "https://files.pythonhosted.org/packages/dc/0b/b2b83f30b8e84a51bf4f96aa3f5f65fdf7c31c591cc519310942339977e2/torch-2.7.0-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:34e0168ed6de99121612d72224e59b2a58a83dae64999990eada7260c5dd582d", size = 68559462 },
-    { url = "https://files.pythonhosted.org/packages/0b/fa/f33a4148c6fb46ca2a3f8de39c24d473822d5774d652b66ed9b1214da5f7/torch-2.6.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:94fc63b3b4bedd327af588696559f68c264440e2503cc9e6954019473d74ae21", size = 66530713 },
+    { url = "https://files.pythonhosted.org/packages/aa/3f/85b56f7e2abcfa558c5fbf7b11eb02d78a4a63e6aeee2bbae3bb552abea5/torch-2.7.0-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:0a8d43caa342b9986101ec5feb5bbf1d86570b5caa01e9cb426378311258fdde", size = 68569377 },
-    { url = "https://files.pythonhosted.org/packages/81/b4/605ae4173aa37fb5aa14605d100ff31f4f5d49f617928c9f486bb3aaec08/torch-2.6.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:9a610afe216a85a8b9bc9f8365ed561535c93e804c2a317ef7fabcc5deda0989", size = 66532538 },
+    { url = "https://files.pythonhosted.org/packages/ee/8d/b2939e5254be932db1a34b2bd099070c509e8887e0c5a90c498a917e4032/torch-2.7.0-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:30b7688a87239a7de83f269333651d8e582afffce6f591fff08c046f7787296e", size = 68574294 },
-    { url = "https://files.pythonhosted.org/packages/88/8b/d60c0491ab63634763be1537ad488694d316ddc4a20eaadd639cedc53971/torch-2.6.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:ff96f4038f8af9f7ec4231710ed4549da1bdebad95923953a25045dcf6fd87e2", size = 66536783 },
+    { url = "https://files.pythonhosted.org/packages/28/fd/74ba6fde80e2b9eef4237fe668ffae302c76f0e4221759949a632ca13afa/torch-2.7.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:edad98dddd82220465b106506bb91ee5ce32bd075cddbcf2b443dfaa2cbd83bf", size = 68856166 },
-    { url = "https://files.pythonhosted.org/packages/b3/17/41f681b87290a1d2f1394f943e470f8b0b3c2987b7df8dc078d8831fce5b/torch-2.6.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:265f70de5fd45b864d924b64be1797f86e76c8e48a02c2a3a6fc7ec247d2226c", size = 66520446 },
+    { url = "https://files.pythonhosted.org/packages/90/48/7e6477cf40d48cc0a61fa0d41ee9582b9a316b12772fcac17bc1a40178e7/torch-2.7.0-cp313-none-macosx_11_0_arm64.whl", hash = "sha256:27f5007bdf45f7bb7af7f11d1828d5c2487e030690afb3d89a651fd7036a390e", size = 68575074 },
    { url = "https://files.pythonhosted.org/packages/85/11/571d6363d1aaee3033af46b40798a0238b24522e9b291b676446943cc8a9/torch-2.7.0-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:ccd7509141713997861b7a947ef0a717143cd7e9240addd168f38ba8fd23fd56", size = 68560465 },
 ]
 [[package]]
 name = "torch"
-version = "2.6.0+cu124"
+version = "2.7.0+cu128"
-source = { registry = "https://download.pytorch.org/whl/cu124" }
+source = { registry = "https://download.pytorch.org/whl/cu128" }
 resolution-markers = [
-    "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
+    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
-    "(python_full_version == '3.11.*' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
+    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
-    "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
+    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
    "python_full_version == '3.10.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
    "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'",
 ]
 dependencies = [
    { name = "filelock", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
@ -2826,6 +2858,7 @@ dependencies = [
    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
@ -2835,26 +2868,58 @@ dependencies = [
    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')" },
    { name = "sympy", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
-    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "triton", marker = "sys_platform == 'linux'" },
    { name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp310-cp310-linux_x86_64.whl", hash = "sha256:7f2ba7f7c0459320a521696f6b5bccc187f59890b23c9dfb6c49b0b87c6bfc97" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp310-cp310-win_amd64.whl", hash = "sha256:7cc45c5b39d74875cfafe908b7f55c544147cc16b01e795feb2fe766583efe78" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ac1849553ee673dfafb44c610c60cb60a2890f0e117f43599a526cf777eb8b8c" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-linux_x86_64.whl", hash = "sha256:d4c3e9a8d31a7c0fcbb9da17c31a1917e1fac26c566a4cfbd8c9568ad7cade79" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:c52c4b869742f00b12cb34521d1381be6119fa46244791704b00cc4a3cb06850" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp311-cp311-win_amd64.whl", hash = "sha256:6a1fb2714e9323f11edb6e8abf7aad5f79e45ad25c081cde87681a18d99c29eb" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp312-cp312-linux_x86_64.whl", hash = "sha256:a393b506844035c0dac2f30ea8478c343b8e95a429f06f3b3cadfc7f53adb597" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:c4bbc0b4be60319ba1cefc90be9557b317f0b3c261eeceb96ca6e0343eec56bf" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp312-cp312-win_amd64.whl", hash = "sha256:3313061c1fec4c7310cf47944e84513dcd27b6173b72a349bb7ca68d0ee6e9c0" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:bf88f647d76d79da9556ca55df49e45aff1d66c12797886364343179dd09a36c" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp313-cp313-linux_x86_64.whl", hash = "sha256:0f3bc53c988ce9568cd876a2a5316761e84a8704135ec8068f5f81b4417979cb" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp313-cp313-win_amd64.whl", hash = "sha256:519330eef09534acad8110b6f423d2fe58c1d8e9ada999ed077a637a0021f908" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:7c0f08d1c44a02abad389373dddfce75904b969a410be2f4e5109483dd3dc0ce" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp313-cp313t-linux_x86_64.whl", hash = "sha256:35cba404c0d742406cdcba1609085874bc60facdfbc50e910c47a92405fef44c" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:1704e5dd66c9221e4e8b6ae2d80cbf54e129571e643f5fa9ca78cc6d2096403a" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp39-cp39-linux_x86_64.whl", hash = "sha256:e661267cd0242462ab100bdd67f651988aa9f67eb31609d6909afcac891df612" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_aarch64.whl" },
-    { url = "https://download.pytorch.org/whl/cu124/torch-2.6.0%2Bcu124-cp39-cp39-win_amd64.whl", hash = "sha256:c2eb62b99161d87be486c88fd82441274cc892bce8c48dbc28c055cb147732ce" },
+    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d2f69f909da5dc52113ec66a851d62079f3d52c83184cf64beebdf12ca2f705c" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:58c749f52ddc9098155c77d6c74153bb13d8978fd6e1063b5d7b41d4644f5af5" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_aarch64.whl" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:78e13c26c38ae92d6841cf9ce760d7e9d52bca3e3183de371812e84274b054dc" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:3559e98be824c2b12ab807319cd61c6174d73a524c9961317de8e8a44133c5c5" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-manylinux_2_28_aarch64.whl" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:f446f97b20cb070747b103fb640df941b88cb68c8d3b01538287d05d56a7e874" },
    { url = "https://download.pytorch.org/whl/cu128/torch-2.7.0%2Bcu128-cp39-cp39-win_amd64.whl", hash = "sha256:8614a167d6a163273fb130f586802f3243479862b53ee2843941c10cc5761da6" },
 ]
 [[package]]
 name = "torchvision"
-version = "0.21.0"
+version = "0.22.0"
 source = { registry = "https://download.pytorch.org/whl/cu128" }
 resolution-markers = [
    "python_full_version >= '3.12' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version == '3.10.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
    "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'",
 ]
 dependencies = [
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
    { name = "numpy", version = "2.2.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' and platform_machine == 'aarch64' and sys_platform == 'linux'" },
    { name = "pillow", marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "platform_machine == 'aarch64' and sys_platform == 'linux'" },
 ]
 wheels = [
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:566224d7b4f00bc6366bed1d62f834ca80f8e57fe41e10e4a5636bfa3ffb984e" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6be714bcdd8849549571f6acfaa2dfa9e00676f042bda517432745fb116f7904" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6e9752b48c1cdd7f6428bcd30c3d198b30ecea348d16afb651f95035e5252506" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:e4d4d5a14225875d9bf8c5221d43d8be97786adc498659493799bdeff52c54cf" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:e50ff5bbae11f57fd3af8e6f2185c136f32e8b94324613428228dd27eba6a4f6" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:7a398fad02f4ac6b7d18bea9a08dc14163ffc5a368618f29ceb0e53dfa91f69e" },
 ]
 [[package]]
 name = "torchvision"
 version = "0.22.0"
 source = { registry = "https://pypi.org/simple" }
 resolution-markers = [
    "python_full_version >= '3.12' and sys_platform != 'linux' and sys_platform != 'win32'",
@ -2866,43 +2931,46 @@ dependencies = [
    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10' and sys_platform != 'linux' and sys_platform != 'win32'" },
    { name = "numpy", version = "2.2.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' and sys_platform != 'linux' and sys_platform != 'win32'" },
    { name = "pillow", marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
-    { name = "torch", version = "2.6.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
+    { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "sys_platform != 'linux' and sys_platform != 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/8e/0d/143bd264876fad17c82096b6c2d433f1ac9b29cdc69ee45023096976ee3d/torchvision-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:044ea420b8c6c3162a234cada8e2025b9076fa82504758cd11ec5d0f8cd9fa37", size = 1784140 },
+    { url = "https://files.pythonhosted.org/packages/eb/03/a514766f068b088180f273913e539d08e830be3ae46ef8577ea62584a27c/torchvision-0.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:72256f1d7ff510b16c9fb4dd488584d0693f40c792f286a9620674438a81ccca", size = 1947829 },
-    { url = "https://files.pythonhosted.org/packages/29/88/00c69db213ee2443ada8886ec60789b227e06bb869d85ee324578221a7f7/torchvision-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:110d115333524d60e9e474d53c7d20f096dbd8a080232f88dddb90566f90064c", size = 1784141 },
+    { url = "https://files.pythonhosted.org/packages/b1/43/28bc858b022f6337326d75f4027d2073aad5432328f01ee1236d847f1b82/torchvision-0.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:191ea28321fc262d8aa1a7fe79c41ff2848864bf382f9f6ea45c41dde8313792", size = 1947828 },
-    { url = "https://files.pythonhosted.org/packages/6e/1b/28f527b22d5e8800184d0bc847f801ae92c7573a8c15979d92b7091c0751/torchvision-0.21.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:97a5814a93c793aaf0179cfc7f916024f4b63218929aee977b645633d074a49f", size = 1784140 },
+    { url = "https://files.pythonhosted.org/packages/cb/ea/887d1d61cf4431a46280972de665f350af1898ce5006cd046326e5d0a2f2/torchvision-0.22.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31c3165418fe21c3d81fe3459e51077c2f948801b8933ed18169f54652796a0f", size = 1947826 },
-    { url = "https://files.pythonhosted.org/packages/f9/56/47d456b61c3bbce7bed4af3925c83d405bb87468e659fd3cf3d9840c3b51/torchvision-0.21.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:659b76c86757cb2ee4ca2db245e0740cfc3081fef46f0f1064d11adb4a8cee31", size = 1784141 },
+    { url = "https://files.pythonhosted.org/packages/e1/2a/9b34685599dcb341d12fc2730055155623db7a619d2415a8d31f17050952/torchvision-0.22.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ece17995857dd328485c9c027c0b20ffc52db232e30c84ff6c95ab77201112c5", size = 1947823 },
-    { url = "https://files.pythonhosted.org/packages/49/d5/d18c5d89cbe32015b033f1fa06918c7cdd5c0af0c03e55d72a3cc2d768f8/torchvision-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5c22caeaae8b3c36d93459f1a5294e6f43306cff856ed243189a229331a404b4", size = 1784154 },
+    { url = "https://files.pythonhosted.org/packages/6f/a7/f43e9c8d13118b4ffbaebea664c9338ab20fa115a908125afd2238ff16e7/torchvision-0.22.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:cdc96daa4658b47ce9384154c86ed1e70cba9d972a19f5de6e33f8f94a626790", size = 2137621 },
    { url = "https://files.pythonhosted.org/packages/3a/6e/eb662050a22a75a85b3b5e5f33dddfdc487c10ffcd20b82a8c2a4a6cd56c/torchvision-0.22.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2ef38a397f1b9cf62846fb20659cb99101f9d361de8c45d79284ee45c6f40d50", size = 1947880 },
 ]
 [[package]]
 name = "torchvision"
-version = "0.21.0+cu124"
+version = "0.22.0+cu128"
-source = { registry = "https://download.pytorch.org/whl/cu124" }
+source = { registry = "https://download.pytorch.org/whl/cu128" }
 resolution-markers = [
-    "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
+    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'win32')",
-    "(python_full_version == '3.11.*' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform == 'win32')",
-    "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
+    "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'win32')",
-    "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
+    "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')",
 ]
 dependencies = [
-    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')" },
+    { name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'win32')" },
-    { name = "numpy", version = "2.2.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and sys_platform == 'linux') or (python_full_version >= '3.10' and sys_platform == 'win32')" },
+    { name = "numpy", version = "2.2.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.10' and sys_platform == 'win32')" },
-    { name = "pillow", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
-    { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
+    { name = "torch", version = "2.7.0+cu128", source = { registry = "https://download.pytorch.org/whl/cu128" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp310-cp310-linux_x86_64.whl", hash = "sha256:3d3e74018eaa7837c73e3764dad3b7792b7544401c25a42977e9744303731bd3" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:59df5a550113a80ce523047066eaaedb168c69482da88c3ab246716ab45ba092" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp310-cp310-win_amd64.whl", hash = "sha256:0c6aefb70ab2b312065240c804e459ac7b0e449867afd469b38d2fd47f9391a7" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp310-cp310-win_amd64.whl", hash = "sha256:cdd90b768b01b0d638cb06a6c211b550b275c0c207b5210b7cbb5cea8dde11db" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-linux_x86_64.whl", hash = "sha256:137376805aca5ba57bd2c7a3ecb8569df961dbe82b128aac9b3b0a7125ef9385" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:f3ac527d58b4c2043eb8d9e29fc56cd1751f36f2aaa6dc75e34ec54c951bcb9c" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp311-cp311-win_amd64.whl", hash = "sha256:000a013584ad2304ab30496318145f284ac364622addb5ee3a5abd2769ba146f" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp311-cp311-win_amd64.whl", hash = "sha256:f5dae1307c34813425c0b753530c035e1cc72af0bded395d1ba64dcb2872889f" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp312-cp312-linux_x86_64.whl", hash = "sha256:efb53ea0af7bf09b7b53e2a18b9be6d245f7d46a90b51d5cf97f37e9b929a991" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:06c101f40e1ff94869be14487c91fd5352e376f202fdeafb8f53c58cee2fbeb5" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp312-cp312-win_amd64.whl", hash = "sha256:ec63c2ee792757492da40590e34b14f2fceda29050558c215f0c1f3b08149c0f" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp312-cp312-win_amd64.whl", hash = "sha256:a87393c86649b7e56b4bf859fe95922ee6ec1c1f3b430246fb1a5b51f8aee37a" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp313-cp313-linux_x86_64.whl", hash = "sha256:4b70acf3b4b96a0ceb1374116626c9bef9e8be016b57b1284e482260ca1896d6" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:ee4fa6d4052d9ae25c1233289947fbfa4b88d23710254ab1772b108c1fc5fb4d" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp313-cp313-win_amd64.whl", hash = "sha256:8fcf55321b206de70ff8e01c884fa42e57a60b1cb749341b96e0f22c8a7c9ec7" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp313-cp313-win_amd64.whl", hash = "sha256:17d50ffb1df6320da16b85395f1078bf369250ea144f3bb405088aca3d5f030f" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp39-cp39-linux_x86_64.whl", hash = "sha256:6afb21a22f5497e08ea4dbd4544472330d8249bf09dafd239302552cad6906b2" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:209c29d78cf2003cf4e22c9b651790f57171334998ee3125594d130526aeaa50" },
-    { url = "https://download.pytorch.org/whl/cu124/torchvision-0.21.0%2Bcu124-cp39-cp39-win_amd64.whl", hash = "sha256:579b6a7fffc34a860c57a7131221ef125831f5961431f8da15760ab1ef752d44" },
+    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp313-cp313t-win_amd64.whl", hash = "sha256:03b454b867f7a0aa9861a463042141448c4f15bec784def19eed39a57fac217b" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c92a353ff82db3312644b5b26d410b586b72969b535948d584c247569f75605c" },
    { url = "https://download.pytorch.org/whl/cu128/torchvision-0.22.0%2Bcu128-cp39-cp39-win_amd64.whl", hash = "sha256:90a0dacad36b1ea8de912af8583cbe780b4a1bdf9cb85870fe548fdec212ab31" },
 ]
 [[package]]
@ -2941,19 +3009,23 @@ wheels = [
 [[package]]
 name = "triton"
-version = "3.2.0"
+version = "3.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "setuptools", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
 ]
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/01/65/3ffa90e158a2c82f0716eee8d26a725d241549b7d7aaf7e4f44ac03ebd89/triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e54983cd51875855da7c68ec05c05cf8bb08df361b1d5b69e05e40b0c9bd62", size = 253090354 },
+    { url = "https://files.pythonhosted.org/packages/76/04/d54d3a6d077c646624dc9461b0059e23fd5d30e0dbe67471e3654aec81f9/triton-3.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fad99beafc860501d7fcc1fb7045d9496cbe2c882b1674640304949165a916e7", size = 156441993 },
-    { url = "https://files.pythonhosted.org/packages/a7/2e/757d2280d4fefe7d33af7615124e7e298ae7b8e3bc4446cdb8e88b0f9bab/triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8009a1fb093ee8546495e96731336a33fb8856a38e45bb4ab6affd6dbc3ba220", size = 253157636 },
+    { url = "https://files.pythonhosted.org/packages/3c/c5/4874a81131cc9e934d88377fbc9d24319ae1fb540f3333b4e9c696ebc607/triton-3.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3161a2bf073d6b22c4e2f33f951f3e5e3001462b2570e6df9cd57565bdec2984", size = 156528461 },
-    { url = "https://files.pythonhosted.org/packages/06/00/59500052cb1cf8cf5316be93598946bc451f14072c6ff256904428eaf03c/triton-3.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d9b215efc1c26fa7eefb9a157915c92d52e000d2bf83e5f69704047e63f125c", size = 253159365 },
+    { url = "https://files.pythonhosted.org/packages/11/53/ce18470914ab6cfbec9384ee565d23c4d1c55f0548160b1c7b33000b11fd/triton-3.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b68c778f6c4218403a6bd01be7484f6dc9e20fe2083d22dd8aef33e3b87a10a3", size = 156504509 },
-    { url = "https://files.pythonhosted.org/packages/c7/30/37a3384d1e2e9320331baca41e835e90a3767303642c7a80d4510152cbcf/triton-3.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e5dfa23ba84541d7c0a531dfce76d8bcd19159d50a4a8b14ad01e91734a5c1b0", size = 253154278 },
+    { url = "https://files.pythonhosted.org/packages/7d/74/4bf2702b65e93accaa20397b74da46fb7a0356452c1bb94dbabaf0582930/triton-3.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47bc87ad66fa4ef17968299acacecaab71ce40a238890acc6ad197c3abe2b8f1", size = 156516468 },
-    { url = "https://files.pythonhosted.org/packages/bc/74/9f12bdedeb110242d8bb1bd621f6605e753ee0cbf73cf7f3a62b8173f190/triton-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:30ceed0eff2c4a73b14eb63e052992f44bbdf175f3fad21e1ac8097a772de7ee", size = 253057866 },
+    { url = "https://files.pythonhosted.org/packages/0a/93/f28a696fa750b9b608baa236f8225dd3290e5aff27433b06143adc025961/triton-3.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce4700fc14032af1e049005ae94ba908e71cd6c2df682239aed08e49bc71b742", size = 156580729 },
    { url = "https://files.pythonhosted.org/packages/f0/9c/315d25590fc309e2d28bb67953526238fac5d54548a16ceca992c76441bc/triton-3.3.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1f41403bfa0cbb3e24fd958ca7fee04e9681e55e539296db9aca30c42acae693", size = 156439372 },
 ]
 [[package]]
 name = "typer"
-version = "0.15.1"
+version = "0.15.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "click" },
@ -2961,9 +3033,9 @@ dependencies = [
    { name = "shellingham" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/cb/ce/dca7b219718afd37a0068f4f2530a727c2b74a8b6e8e0c0080a4c0de4fcd/typer-0.15.1.tar.gz", hash = "sha256:a0588c0a7fa68a1978a069818657778f86abe6ff5ea6abf472f940a08bfe4f0a", size = 99789 }
+sdist = { url = "https://files.pythonhosted.org/packages/98/1a/5f36851f439884bcfe8539f6a20ff7516e7b60f319bbaf69a90dc35cc2eb/typer-0.15.3.tar.gz", hash = "sha256:818873625d0569653438316567861899f7e9972f2e6e0c16dab608345ced713c", size = 101641 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d0/cc/0a838ba5ca64dc832aa43f727bd586309846b0ffb2ce52422543e6075e8a/typer-0.15.1-py3-none-any.whl", hash = "sha256:7994fb7b8155b64d3402518560648446072864beefd44aa2dc36972a5972e847", size = 44908 },
+    { url = "https://files.pythonhosted.org/packages/48/20/9d953de6f4367163d23ec823200eb3ecb0050a2609691e512c8b95827a9b/typer-0.15.3-py3-none-any.whl", hash = "sha256:c86a65ad77ca531f03de08d1b9cb67cd09ad02ddddf4b34745b5008f43b239bd", size = 45253 },
 ]
 [[package]]