diff --git a/docs/source/supported_models.md b/docs/source/supported_models.md
index 453b4f61a..f168fd76b 100644
--- a/docs/source/supported_models.md
+++ b/docs/source/supported_models.md
@@ -14,8 +14,8 @@ Text Generation Inference enables serving optimized models. The following sectio
 - [Gemma](https://huggingface.co/google/gemma-7b)
 - [PaliGemma](https://huggingface.co/google/paligemma-3b-pt-224)
 - [Gemma2](https://huggingface.co/collections/google/gemma-2-release-667d6600fd5220e7b967f315)
-- [Gemma3](https://huggingface.co/collections/google/gemma-3)
-- [Gemma3 Text](https://huggingface.co/collections/google/gemma-3)
+- [Gemma3](https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d)
+- [Gemma3 Text](https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d)
 - [Cohere](https://huggingface.co/CohereForAI/c4ai-command-r-plus)
 - [Dbrx](https://huggingface.co/databricks/dbrx-instruct)
 - [Mamba](https://huggingface.co/state-spaces/mamba-2.8b-slimpj)
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_exceed_window.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_exceed_window.json
index ec8cd4f62..5c6b4cb94 100644
--- a/integration-tests/models/__snapshots__/test_flash_gemma3/test_exceed_window.json
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_exceed_window.json
@@ -1,133 +1,109 @@
 {
   "details": {
     "best_of_sequences": null,
-    "finish_reason": "length",
-    "generated_tokens": 20,
+    "finish_reason": "eos_token",
+    "generated_tokens": 16,
     "prefill": [],
     "seed": null,
     "tokens": [
+      {
+        "id": 506,
+        "logprob": -1.3984375,
+        "special": false,
+        "text": " the"
+      },
+      {
+        "id": 1331,
+        "logprob": -1.6953125,
+        "special": false,
+        "text": " people"
+      },
       {
         "id": 236764,
-        "logprob": -0.44726562,
+        "logprob": -0.23535156,
         "special": false,
         "text": ","
       },
       {
-        "id": 236743,
-        "logprob": -0.011413574,
+        "id": 532,
+        "logprob": -0.24316406,
         "special": false,
-        "text": " "
+        "text": " and"
       },
       {
-        "id": 236812,
-        "logprob": -0.09814453,
+        "id": 506,
+        "logprob": -0.12109375,
         "special": false,
-        "text": "4"
+        "text": " the"
       },
       {
-        "id": 236764,
-        "logprob": -0.044189453,
+        "id": 2780,
+        "logprob": -1.1640625,
         "special": false,
-        "text": ","
+        "text": " food"
       },
       {
-        "id": 236743,
-        "logprob": -0.15625,
+        "id": 236761,
+        "logprob": -0.21386719,
         "special": false,
-        "text": " "
+        "text": "."
       },
       {
-        "id": 236810,
-        "logprob": -0.010864258,
+        "id": 108,
+        "logprob": -0.64453125,
         "special": false,
-        "text": "5"
+        "text": "\n\n"
       },
       {
-        "id": 236764,
-        "logprob": -0.040039062,
+        "id": 2094,
+        "logprob": -0.77734375,
         "special": false,
-        "text": ","
+        "text": "This"
       },
       {
-        "id": 236743,
-        "logprob": -0.26757812,
+        "id": 563,
+        "logprob": -0.040283203,
         "special": false,
-        "text": " "
+        "text": " is"
       },
       {
-        "id": 236825,
-        "logprob": -0.0047302246,
+        "id": 496,
+        "logprob": -0.03125,
         "special": false,
-        "text": "6"
+        "text": " a"
       },
       {
-        "id": 236764,
-        "logprob": -0.026123047,
+        "id": 6290,
+        "logprob": -0.03515625,
         "special": false,
-        "text": ","
+        "text": " nice"
       },
       {
-        "id": 236743,
-        "logprob": -0.265625,
+        "id": 1977,
+        "logprob": -0.0020751953,
         "special": false,
-        "text": " "
+        "text": " place"
       },
       {
-        "id": 236832,
-        "logprob": -0.014160156,
+        "id": 236761,
+        "logprob": -0.0079956055,
         "special": false,
-        "text": "7"
+        "text": "."
       },
       {
-        "id": 236764,
-        "logprob": -0.013977051,
+        "id": 107,
+        "logprob": -0.9921875,
         "special": false,
-        "text": ","
+        "text": "\n"
       },
       {
-        "id": 236743,
-        "logprob": -0.103515625,
-        "special": false,
-        "text": " "
-      },
-      {
-        "id": 236828,
-        "logprob": -0.008178711,
-        "special": false,
-        "text": "8"
-      },
-      {
-        "id": 236764,
-        "logprob": -0.030151367,
-        "special": false,
-        "text": ","
-      },
-      {
-        "id": 236743,
-        "logprob": -0.39453125,
-        "special": false,
-        "text": " "
-      },
-      {
-        "id": 236819,
-        "logprob": -0.008728027,
-        "special": false,
-        "text": "9"
-      },
-      {
-        "id": 236764,
-        "logprob": -0.020629883,
-        "special": false,
-        "text": ","
-      },
-      {
-        "id": 236743,
-        "logprob": -0.08154297,
-        "special": false,
-        "text": " "
+        "id": 106,
+        "logprob": -0.45507812,
+        "special": true,
+        "text": "<end_of_turn>"
       }
     ],
     "top_tokens": null
   },
-  "generated_text": ", 4, 5, 6, 7, 8, 9, "
+  "generated_text": " the people, and the food.\n\nThis is a nice place.\n"
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3.json
index 1324555aa..859544c89 100644
--- a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3.json
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3.json
@@ -8,31 +8,31 @@
     "tokens": [
       {
         "id": 1331,
-        "logprob": -0.32421875,
+        "logprob": -0.34960938,
         "special": false,
         "text": " people"
       },
       {
         "id": 8390,
-        "logprob": -0.15332031,
+        "logprob": -0.14746094,
         "special": false,
         "text": " died"
       },
       {
         "id": 528,
-        "logprob": -1.140625,
+        "logprob": -1.2265625,
         "special": false,
         "text": " in"
       },
       {
         "id": 506,
-        "logprob": -0.42578125,
+        "logprob": -0.47070312,
         "special": false,
         "text": " the"
       },
       {
         "id": 3640,
-        "logprob": -0.64453125,
+        "logprob": -0.5859375,
         "special": false,
         "text": " United"
       },
@@ -44,31 +44,31 @@
       },
       {
         "id": 236761,
-        "logprob": -0.37890625,
+        "logprob": -0.34765625,
         "special": false,
         "text": "."
       },
       {
         "id": 108,
-        "logprob": -0.08300781,
+        "logprob": -0.0859375,
         "special": false,
         "text": "\n\n"
       },
       {
         "id": 818,
-        "logprob": -1.1796875,
+        "logprob": -1.1640625,
         "special": false,
         "text": "The"
       },
       {
         "id": 6816,
-        "logprob": -1.765625,
+        "logprob": -1.890625,
         "special": false,
         "text": " generally"
       },
       {
         "id": 10951,
-        "logprob": -0.14550781,
+        "logprob": -0.14648438,
         "special": false,
         "text": " accepted"
       },
@@ -86,49 +86,49 @@
       },
       {
         "id": 600,
-        "logprob": -0.65625,
+        "logprob": -0.65234375,
         "special": false,
         "text": " that"
       },
       {
         "id": 236743,
-        "logprob": -1.1796875,
+        "logprob": -1.2109375,
         "special": false,
         "text": " "
       },
       {
         "id": 236825,
-        "logprob": -0.0009918213,
+        "logprob": -0.00088119507,
         "special": false,
         "text": "6"
       },
       {
         "id": 236832,
-        "logprob": -6.532669e-05,
+        "logprob": -6.580353e-05,
         "special": false,
         "text": "7"
       },
       {
         "id": 236810,
-        "logprob": -4.863739e-05,
+        "logprob": -5.2690506e-05,
         "special": false,
         "text": "5"
       },
       {
         "id": 236764,
-        "logprob": -0.00017929077,
+        "logprob": -0.0001745224,
         "special": false,
         "text": ","
       },
       {
         "id": 236771,
-        "logprob": -1.2397766e-05,
+        "logprob": -1.180172e-05,
         "special": false,
         "text": "0"
       },
       {
         "id": 236771,
-        "logprob": -2.1457672e-06,
+        "logprob": -1.7881393e-06,
         "special": false,
         "text": "0"
       },
@@ -140,7 +140,7 @@
       },
       {
         "id": 1331,
-        "logprob": -0.50390625,
+        "logprob": -0.44921875,
         "special": false,
         "text": " people"
       },
@@ -152,67 +152,67 @@
       },
       {
         "id": 528,
-        "logprob": -0.08496094,
+        "logprob": -0.084472656,
         "special": false,
         "text": " in"
       },
       {
         "id": 506,
-        "logprob": -0.0003299713,
+        "logprob": -0.00034713745,
         "special": false,
         "text": " the"
       },
       {
         "id": 3640,
-        "logprob": -0.028442383,
+        "logprob": -0.028564453,
         "special": false,
         "text": " United"
       },
       {
         "id": 4184,
-        "logprob": -0.00011014938,
+        "logprob": -0.00012207031,
         "special": false,
         "text": " States"
       },
       {
         "id": 236761,
-        "logprob": -1.1796875,
+        "logprob": -1.15625,
         "special": false,
         "text": "."
       },
       {
         "id": 3153,
-        "logprob": -0.104003906,
+        "logprob": -0.103027344,
         "special": false,
         "text": " However"
       },
       {
         "id": 236764,
-        "logprob": -0.009094238,
+        "logprob": -0.009155273,
         "special": false,
         "text": ","
       },
       {
         "id": 1070,
-        "logprob": -0.88671875,
+        "logprob": -0.92578125,
         "special": false,
         "text": " some"
       },
       {
         "id": 61806,
-        "logprob": -0.84765625,
+        "logprob": -0.91796875,
         "special": false,
         "text": " historians"
       },
       {
         "id": 4646,
-        "logprob": -1.34375,
+        "logprob": -1.3828125,
         "special": false,
         "text": " believe"
       },
       {
         "id": 506,
-        "logprob": -0.59375,
+        "logprob": -0.65234375,
         "special": false,
         "text": " the"
       },
@@ -230,7 +230,7 @@
       },
       {
         "id": 1451,
-        "logprob": -0.60546875,
+        "logprob": -0.66015625,
         "special": false,
         "text": " could"
       },
@@ -242,73 +242,73 @@
       },
       {
         "id": 618,
-        "logprob": -0.61328125,
+        "logprob": -0.57421875,
         "special": false,
         "text": " as"
       },
       {
         "id": 1494,
-        "logprob": -0.00033569336,
+        "logprob": -0.00036239624,
         "special": false,
         "text": " high"
       },
       {
         "id": 618,
-        "logprob": -0.0001411438,
+        "logprob": -0.0001335144,
         "special": false,
         "text": " as"
       },
       {
         "id": 236743,
-        "logprob": -0.001045227,
+        "logprob": -0.0009689331,
         "special": false,
         "text": " "
       },
       {
         "id": 236770,
-        "logprob": -0.21289062,
+        "logprob": -0.26367188,
         "special": false,
         "text": "1"
       },
       {
         "id": 236771,
-        "logprob": -0.13378906,
+        "logprob": -0.17773438,
         "special": false,
         "text": "0"
       },
       {
         "id": 3625,
-        "logprob": -0.0087890625,
+        "logprob": -0.012084961,
         "special": false,
         "text": " million"
       },
       {
         "id": 236761,
-        "logprob": -0.2109375,
+        "logprob": -0.21289062,
         "special": false,
         "text": "."
       },
       {
         "id": 108,
-        "logprob": -0.39453125,
+        "logprob": -0.37304688,
         "special": false,
         "text": "\n\n"
       },
       {
         "id": 236777,
-        "logprob": -1.1328125,
+        "logprob": -1.078125,
         "special": false,
         "text": "I"
       },
       {
         "id": 1006,
-        "logprob": -1.4140625,
+        "logprob": -1.3203125,
         "special": false,
         "text": " am"
       },
       {
         "id": 3182,
-        "logprob": -1.15625,
+        "logprob": -1.078125,
         "special": false,
         "text": " looking"
       },
@@ -320,13 +320,13 @@
       },
       {
         "id": 919,
-        "logprob": -1.2734375,
+        "logprob": -1.25,
         "special": false,
         "text": " more"
       },
       {
         "id": 1938,
-        "logprob": -1.2265625,
+        "logprob": -1.2421875,
         "special": false,
         "text": " information"
       },
@@ -338,169 +338,169 @@
       },
       {
         "id": 672,
-        "logprob": -0.77734375,
+        "logprob": -0.73046875,
         "special": false,
         "text": " this"
       },
       {
         "id": 59725,
-        "logprob": -0.70703125,
+        "logprob": -0.75,
         "special": false,
         "text": " discrepancy"
       },
       {
         "id": 532,
-        "logprob": -0.8515625,
+        "logprob": -0.83984375,
         "special": false,
         "text": " and"
       },
       {
         "id": 506,
-        "logprob": -0.65625,
+        "logprob": -0.7109375,
         "special": false,
         "text": " the"
       },
       {
         "id": 5872,
-        "logprob": -1.15625,
+        "logprob": -1.2734375,
         "special": false,
         "text": " factors"
       },
       {
         "id": 600,
-        "logprob": -0.2265625,
+        "logprob": -0.22851562,
         "special": false,
         "text": " that"
       },
       {
         "id": 19263,
-        "logprob": -1.125,
+        "logprob": -1.1640625,
         "special": false,
         "text": " contributed"
       },
       {
         "id": 531,
-        "logprob": -0.001083374,
+        "logprob": -0.0010757446,
         "special": false,
         "text": " to"
       },
       {
         "id": 506,
-        "logprob": -0.2109375,
+        "logprob": -0.18945312,
         "special": false,
         "text": " the"
       },
       {
         "id": 5777,
-        "logprob": -1.21875,
+        "logprob": -1.2734375,
         "special": false,
         "text": " wide"
       },
       {
         "id": 2644,
-        "logprob": -0.018310547,
+        "logprob": -0.01940918,
         "special": false,
         "text": " range"
       },
       {
         "id": 529,
-        "logprob": -0.12988281,
+        "logprob": -0.14550781,
         "special": false,
         "text": " of"
       },
       {
         "id": 14287,
-        "logprob": -0.03564453,
+        "logprob": -0.032470703,
         "special": false,
         "text": " estimates"
       },
       {
         "id": 236761,
-        "logprob": -0.010314941,
+        "logprob": -0.010375977,
         "special": false,
         "text": "."
       },
       {
         "id": 108,
-        "logprob": -0.060546875,
+        "logprob": -0.06591797,
         "special": false,
         "text": "\n\n"
       },
       {
         "id": 8291,
-        "logprob": -0.734375,
+        "logprob": -0.8046875,
         "special": false,
         "text": "Here"
       },
       {
         "id": 236789,
-        "logprob": -0.26367188,
+        "logprob": -0.23828125,
         "special": false,
         "text": "'"
       },
       {
         "id": 236751,
-        "logprob": -1.1920929e-06,
+        "logprob": -1.0728836e-06,
         "special": false,
         "text": "s"
       },
       {
         "id": 496,
-        "logprob": -0.15527344,
+        "logprob": -0.17480469,
         "special": false,
         "text": " a"
       },
       {
         "id": 25890,
-        "logprob": -0.08886719,
+        "logprob": -0.087402344,
         "special": false,
         "text": " breakdown"
       },
       {
         "id": 529,
-        "logprob": -0.0020446777,
+        "logprob": -0.0021209717,
         "special": false,
         "text": " of"
       },
       {
         "id": 506,
-        "logprob": -0.17871094,
+        "logprob": -0.19140625,
         "special": false,
         "text": " the"
       },
       {
         "id": 5872,
-        "logprob": -0.90234375,
+        "logprob": -1.0078125,
         "special": false,
         "text": " factors"
       },
       {
         "id": 20894,
-        "logprob": -0.25976562,
+        "logprob": -0.26367188,
         "special": false,
         "text": " contributing"
       },
       {
         "id": 531,
-        "logprob": -8.34465e-05,
+        "logprob": -9.250641e-05,
         "special": false,
         "text": " to"
       },
       {
         "id": 506,
-        "logprob": -0.008544922,
+        "logprob": -0.008666992,
         "special": false,
         "text": " the"
       },
       {
         "id": 5777,
-        "logprob": -0.62109375,
+        "logprob": -0.6171875,
         "special": false,
         "text": " wide"
       },
       {
         "id": 2644,
-        "logprob": -0.0023345947,
+        "logprob": -0.0023956299,
         "special": false,
         "text": " range"
       },
@@ -512,25 +512,25 @@
       },
       {
         "id": 14287,
-        "logprob": -0.011291504,
+        "logprob": -0.011352539,
         "special": false,
         "text": " estimates"
       },
       {
         "id": 573,
-        "logprob": -0.29101562,
+        "logprob": -0.30664062,
         "special": false,
         "text": " for"
       },
       {
         "id": 506,
-        "logprob": -0.21484375,
+        "logprob": -0.21386719,
         "special": false,
         "text": " the"
       },
       {
         "id": 236743,
-        "logprob": -0.2890625,
+        "logprob": -0.35351562,
         "special": false,
         "text": " "
       },
@@ -566,19 +566,19 @@
       },
       {
         "id": 10248,
-        "logprob": -0.01953125,
+        "logprob": -0.015258789,
         "special": false,
         "text": " pandemic"
       },
       {
         "id": 4355,
-        "logprob": -0.78515625,
+        "logprob": -0.83203125,
         "special": false,
         "text": " death"
       },
       {
         "id": 25363,
-        "logprob": -6.771088e-05,
+        "logprob": -7.43866e-05,
         "special": false,
         "text": " toll"
       },
@@ -590,13 +590,13 @@
       },
       {
         "id": 506,
-        "logprob": -7.033348e-06,
+        "logprob": -6.67572e-06,
         "special": false,
         "text": " the"
       },
       {
         "id": 3640,
-        "logprob": -0.0067443848,
+        "logprob": -0.0059509277,
         "special": false,
         "text": " United"
       },
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_jpg.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_jpg.json
new file mode 100644
index 000000000..ae67e0060
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_jpg.json
@@ -0,0 +1,26 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "Okay, let's analyze the image.\n\nThe image is a solid, bright white color. There is nothing else visible within it. \n\nIt's essentially a blank white canvas or a completely white square. \n\nIs there anything specific you'd like me to do with this image, such as describe it further or imagine what it might represent?",
+        "name": null,
+        "role": "assistant",
+        "tool_calls": null
+      },
+      "usage": null
+    }
+  ],
+  "created": 1741965894,
+  "id": "",
+  "model": "google/gemma-3-4b-it",
+  "object": "chat.completion",
+  "system_fingerprint": "3.2.1-dev0-native",
+  "usage": {
+    "completion_tokens": 74,
+    "prompt_tokens": 277,
+    "total_tokens": 351
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_png.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_png.json
new file mode 100644
index 000000000..afbfba30a
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgb_png.json
@@ -0,0 +1,26 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "Okay, let's analyze the image. \n\nThe image is entirely white, with a very subtle, faint outline of a stylized, cartoonish figure. It appears to be a simplified depiction of a person, likely a child, with a wide-eyed expression and a small, rounded body. \n\nIt's almost like a minimalist, iconic representation. \n\nDo you want me to try and describe it in more detail or perhaps speculate about the context of the image?",
+        "name": null,
+        "role": "assistant",
+        "tool_calls": null
+      },
+      "usage": null
+    }
+  ],
+  "created": 1741965892,
+  "id": "",
+  "model": "google/gemma-3-4b-it",
+  "object": "chat.completion",
+  "system_fingerprint": "3.2.1-dev0-native",
+  "usage": {
+    "completion_tokens": 98,
+    "prompt_tokens": 277,
+    "total_tokens": 375
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgba.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgba.json
new file mode 100644
index 000000000..1b97d2615
--- /dev/null
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_base64_rgba.json
@@ -0,0 +1,26 @@
+{
+  "choices": [
+    {
+      "finish_reason": "stop",
+      "index": 0,
+      "logprobs": null,
+      "message": {
+        "content": "Okay, let's analyze the image. \n\nThe transparent image reveals a stylized depiction of **a human head**. It's a minimalist, geometric representation, showing the basic shapes of the skull, eye sockets, and head outline. \n\nDo you want me to describe any specific element of the image in more detail?",
+        "name": null,
+        "role": "assistant",
+        "tool_calls": null
+      },
+      "usage": null
+    }
+  ],
+  "created": 1741966313,
+  "id": "",
+  "model": "google/gemma-3-4b-it",
+  "object": "chat.completion",
+  "system_fingerprint": "3.2.1-dev0-native",
+  "usage": {
+    "completion_tokens": 67,
+    "prompt_tokens": 277,
+    "total_tokens": 344
+  }
+}
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow.json
index 6c30ada41..cd786b3ce 100644
--- a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow.json
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow.json
@@ -5,7 +5,7 @@
       "index": 0,
       "logprobs": null,
       "message": {
-        "content": "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a humorous and unexpected sight of a cow enjoying a tropical beach!",
+        "content": "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a day at the beach!",
         "name": null,
         "role": "assistant",
         "tool_calls": null
@@ -13,14 +13,14 @@
       "usage": null
     }
   ],
-  "created": 1741703756,
+  "created": 1741964480,
   "id": "",
-  "model": "gg-hf-g/gemma-3-4b-it",
+  "model": "google/gemma-3-4b-it",
   "object": "chat.completion",
-  "system_fingerprint": "3.1.2-dev0-native",
+  "system_fingerprint": "3.2.1-dev0-native",
   "usage": {
-    "completion_tokens": 70,
-    "prompt_tokens": 277,
-    "total_tokens": 347
+    "completion_tokens": 74,
+    "prompt_tokens": 275,
+    "total_tokens": 349
   }
 }
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json
index fe67c9954..5ed2c4507 100644
--- a/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json
+++ b/integration-tests/models/__snapshots__/test_flash_gemma3/test_flash_gemma3_image_cow_dog.json
@@ -5,7 +5,7 @@
       "index": 0,
       "logprobs": null,
       "message": {
-        "content": "Based on the image, the animal is a cow, not a dog! \n\nIt appears to be a **Brazilian cattle breed** known as a **Gir Cow**. They are recognized for their reddish-brown color and distinctive markings.",
+        "content": "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their reddish-brown color and distinctive white markings. \n\nIf you'd like, you can send me another image and I’ll do my best to identify it!",
         "name": null,
         "role": "assistant",
         "tool_calls": null
@@ -13,14 +13,14 @@
       "usage": null
     }
   ],
-  "created": 1741703753,
+  "created": 1741964477,
   "id": "",
-  "model": "gg-hf-g/gemma-3-4b-it",
+  "model": "google/gemma-3-4b-it",
   "object": "chat.completion",
-  "system_fingerprint": "3.1.2-dev0-native",
+  "system_fingerprint": "3.2.1-dev0-native",
   "usage": {
-    "completion_tokens": 48,
-    "prompt_tokens": 281,
-    "total_tokens": 329
+    "completion_tokens": 75,
+    "prompt_tokens": 279,
+    "total_tokens": 354
   }
 }
diff --git a/integration-tests/models/test_flash_gemma3.py b/integration-tests/models/test_flash_gemma3.py
index ab812d644..5064f34d5 100644
--- a/integration-tests/models/test_flash_gemma3.py
+++ b/integration-tests/models/test_flash_gemma3.py
@@ -1,3 +1,7 @@
+import base64
+from io import BytesIO
+from PIL import Image
+
 import pytest
 
 
@@ -49,9 +53,9 @@ async def test_flash_gemma3_image_cow_dog(flash_gemma3, response_snapshot):
 
     assert (
         response.choices[0].message.content
-        == "Based on the image, the animal is a cow, not a dog! \n\nIt appears to be a **Brazilian cattle breed** known as a **Gir Cow**. They are recognized for their reddish-brown color and distinctive markings."
+        == "That's a fantastic question! However, the image doesn't show a dog. It shows a **Brown Swiss cow** standing on a beach. \n\nBrown Swiss cows are known for their reddish-brown color and distinctive white markings. \n\nIf you'd like, you can send me another image and I’ll do my best to identify it!"
     )
-    assert response.usage["completion_tokens"] == 48
+    assert response.usage["completion_tokens"] == 75
     assert response == response_snapshot
 
 
@@ -72,19 +76,95 @@ async def test_flash_gemma3_image_cow(flash_gemma3, response_snapshot):
     )
     assert (
         response.choices[0].message.content
-        == "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a humorous and unexpected sight of a cow enjoying a tropical beach!"
+        == "Here's a description of what's shown in the image:\n\nThe image depicts a brown cow standing on a sandy beach. The beach has turquoise water and a distant island visible in the background. The sky is bright blue with some white clouds. \n\nIt's a quite a humorous and unusual scene – a cow enjoying a day at the beach!"
     )
-    assert response.usage["completion_tokens"] == 70
+    assert response.usage["completion_tokens"] == 74
     assert response == response_snapshot
 
 
 async def test_exceed_window(flash_gemma3, response_snapshot):
     response = await flash_gemma3.generate(
-        "This is a nice place. " * 800 + "Now count: 1, 2, 3",
+        "This is a nice place. " * 800 + "I really enjoy the scenery,",
         seed=42,
         max_new_tokens=20,
     )
 
-    assert response.generated_text == ", 4, 5, 6, 7, 8, 9, "
-    assert response.details.generated_tokens == 20
+    assert (
+        response.generated_text
+        == " the people, and the food.\n\nThis is a nice place.\n"
+    )
+    assert response.details.generated_tokens == 16
+    assert response == response_snapshot
+
+
+# Helper function to convert a Pillow image to a base64 data URL
+def image_to_data_url(img: Image.Image, fmt: str) -> str:
+    buffer = BytesIO()
+    img.save(buffer, format=fmt)
+    img_data = buffer.getvalue()
+    b64_str = base64.b64encode(img_data).decode("utf-8")
+    mime_type = "image/png" if fmt.upper() == "PNG" else "image/jpeg"
+    return f"data:{mime_type};base64,{b64_str}"
+
+
+async def test_flash_gemma3_image_base64_rgba(flash_gemma3, response_snapshot):
+    # Create an empty 100x100 PNG image with alpha (transparent background)
+    img = Image.new("RGBA", (100, 100), (0, 0, 0, 0))
+    data_url = image_to_data_url(img, "PNG")
+    response = await flash_gemma3.chat(
+        seed=42,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": data_url}},
+                    {
+                        "type": "text",
+                        "text": "What do you see in this transparent image?",
+                    },
+                ],
+            },
+        ],
+        max_tokens=100,
+    )
+    assert response == response_snapshot
+
+
+async def test_flash_gemma3_image_base64_rgb_png(flash_gemma3, response_snapshot):
+    # Create an empty 100x100 PNG image without alpha (white background)
+    img = Image.new("RGB", (100, 100), (255, 255, 255))
+    data_url = image_to_data_url(img, "PNG")
+    response = await flash_gemma3.chat(
+        seed=42,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": data_url}},
+                    {"type": "text", "text": "What do you see in this plain image?"},
+                ],
+            },
+        ],
+        max_tokens=100,
+    )
+    assert response == response_snapshot
+
+
+async def test_flash_gemma3_image_base64_rgb_jpg(flash_gemma3, response_snapshot):
+    # Create an empty 100x100 JPEG image (white background)
+    img = Image.new("RGB", (100, 100), (255, 255, 255))
+    data_url = image_to_data_url(img, "JPEG")
+    response = await flash_gemma3.chat(
+        seed=42,
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": data_url}},
+                    {"type": "text", "text": "What do you see in this JPEG image?"},
+                ],
+            },
+        ],
+        max_tokens=100,
+    )
     assert response == response_snapshot
diff --git a/integration-tests/pyproject.toml b/integration-tests/pyproject.toml
index 07aa43073..abe8cfeeb 100644
--- a/integration-tests/pyproject.toml
+++ b/integration-tests/pyproject.toml
@@ -15,6 +15,7 @@ dependencies = [
     "numpy>=2.0",
     "openai>=1.65",
     "huggingface_hub>=0.29",
+    "pillow>=11.1.0",
 ]
 
 [tool.isort]
diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
index a85db4a5b..ca2dee938 100644
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
@@ -1,8 +1,8 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements.txt
-aiohappyeyeballs==2.4.6
+#    uv pip compile pyproject.toml
+aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.11.12
+aiohttp==3.11.13
     # via text-generation
 aiosignal==1.3.2
     # via aiohttp
@@ -12,7 +12,7 @@ anyio==4.8.0
     # via
     #   httpx
     #   openai
-attrs==25.1.0
+attrs==25.3.0
     # via aiohttp
 certifi==2025.1.31
     # via
@@ -25,13 +25,13 @@ distro==1.9.0
     # via openai
 docker==7.1.0
     # via text-generation-integration-tests (pyproject.toml)
-filelock==3.17.0
+filelock==3.18.0
     # via huggingface-hub
 frozenlist==1.5.0
     # via
     #   aiohttp
     #   aiosignal
-fsspec==2025.2.0
+fsspec==2025.3.0
     # via huggingface-hub
 h11==0.14.0
     # via httpcore
@@ -39,7 +39,7 @@ httpcore==1.0.7
     # via httpx
 httpx==0.28.1
     # via openai
-huggingface-hub==0.29.0
+huggingface-hub==0.29.3
     # via
     #   text-generation-integration-tests (pyproject.toml)
     #   text-generation
@@ -51,7 +51,7 @@ idna==3.10
     #   yarl
 iniconfig==2.0.0
     # via pytest
-jiter==0.8.2
+jiter==0.9.0
     # via openai
 multidict==6.1.0
     # via
@@ -59,15 +59,17 @@ multidict==6.1.0
     #   yarl
 numpy==2.2.3
     # via text-generation-integration-tests (pyproject.toml)
-openai==1.65.3
+openai==1.66.3
     # via text-generation-integration-tests (pyproject.toml)
 packaging==24.2
     # via
     #   huggingface-hub
     #   pytest
+pillow==11.1.0
+    # via text-generation-integration-tests (pyproject.toml)
 pluggy==1.5.0
     # via pytest
-propcache==0.2.1
+propcache==0.3.0
     # via
     #   aiohttp
     #   yarl
@@ -78,7 +80,7 @@ pydantic==2.10.6
     #   text-generation
 pydantic-core==2.27.2
     # via pydantic
-pytest==8.3.4
+pytest==8.3.5
     # via
     #   text-generation-integration-tests (pyproject.toml)
     #   pytest-asyncio
@@ -95,7 +97,7 @@ sniffio==1.3.1
     # via
     #   anyio
     #   openai
-syrupy==4.8.1
+syrupy==4.9.0
     # via text-generation-integration-tests (pyproject.toml)
 text-generation==0.7.0
     # via text-generation-integration-tests (pyproject.toml)
diff --git a/integration-tests/uv.lock b/integration-tests/uv.lock
index 9f3765b87..bad6aa8f2 100644
--- a/integration-tests/uv.lock
+++ b/integration-tests/uv.lock
@@ -97,6 +97,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 },
 ]
 
+[[package]]
+name = "anyio"
+version = "4.8.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
+    { name = "idna" },
+    { name = "sniffio" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/73/199a98fc2dae33535d6b8e8e6ec01f8c1d76c9adb096c6b7d64823038cde/anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a", size = 181126 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041 },
+]
+
 [[package]]
 name = "async-timeout"
 version = "5.0.1"
@@ -181,6 +196,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
 ]
 
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
+]
+
 [[package]]
 name = "docker"
 version = "7.1.0"
@@ -276,6 +300,43 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e2/94/758680531a00d06e471ef649e4ec2ed6bf185356a7f9fbfbb7368a40bd49/fsspec-2025.2.0-py3-none-any.whl", hash = "sha256:9de2ad9ce1f85e1931858535bc882543171d197001a0a5eb2ddc04f1781ab95b", size = 184484 },
 ]
 
+[[package]]
+name = "h11"
+version = "0.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.7"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551 },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
+]
+
 [[package]]
 name = "huggingface-hub"
 version = "0.29.0"
@@ -312,6 +373,50 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374", size = 5892 },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/1e/c2/e4562507f52f0af7036da125bb699602ead37a2332af0788f8e0a3417f36/jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", size = 162604 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b0/82/39f7c9e67b3b0121f02a0b90d433626caa95a565c3d2449fea6bcfa3f5f5/jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad", size = 314540 },
+    { url = "https://files.pythonhosted.org/packages/01/07/7bf6022c5a152fca767cf5c086bb41f7c28f70cf33ad259d023b53c0b858/jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea", size = 321065 },
+    { url = "https://files.pythonhosted.org/packages/6c/b2/de3f3446ecba7c48f317568e111cc112613da36c7b29a6de45a1df365556/jiter-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51", size = 341664 },
+    { url = "https://files.pythonhosted.org/packages/13/cf/6485a4012af5d407689c91296105fcdb080a3538e0658d2abf679619c72f/jiter-0.9.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538", size = 364635 },
+    { url = "https://files.pythonhosted.org/packages/0d/f7/4a491c568f005553240b486f8e05c82547340572d5018ef79414b4449327/jiter-0.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d", size = 406288 },
+    { url = "https://files.pythonhosted.org/packages/d3/ca/f4263ecbce7f5e6bded8f52a9f1a66540b270c300b5c9f5353d163f9ac61/jiter-0.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12", size = 397499 },
+    { url = "https://files.pythonhosted.org/packages/ac/a2/522039e522a10bac2f2194f50e183a49a360d5f63ebf46f6d890ef8aa3f9/jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51", size = 352926 },
+    { url = "https://files.pythonhosted.org/packages/b1/67/306a5c5abc82f2e32bd47333a1c9799499c1c3a415f8dde19dbf876f00cb/jiter-0.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708", size = 384506 },
+    { url = "https://files.pythonhosted.org/packages/0f/89/c12fe7b65a4fb74f6c0d7b5119576f1f16c79fc2953641f31b288fad8a04/jiter-0.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5", size = 520621 },
+    { url = "https://files.pythonhosted.org/packages/c4/2b/d57900c5c06e6273fbaa76a19efa74dbc6e70c7427ab421bf0095dfe5d4a/jiter-0.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678", size = 512613 },
+    { url = "https://files.pythonhosted.org/packages/89/05/d8b90bfb21e58097d5a4e0224f2940568366f68488a079ae77d4b2653500/jiter-0.9.0-cp310-cp310-win32.whl", hash = "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4", size = 206613 },
+    { url = "https://files.pythonhosted.org/packages/2c/1d/5767f23f88e4f885090d74bbd2755518050a63040c0f59aa059947035711/jiter-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322", size = 208371 },
+    { url = "https://files.pythonhosted.org/packages/23/44/e241a043f114299254e44d7e777ead311da400517f179665e59611ab0ee4/jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af", size = 314654 },
+    { url = "https://files.pythonhosted.org/packages/fb/1b/a7e5e42db9fa262baaa9489d8d14ca93f8663e7f164ed5e9acc9f467fc00/jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58", size = 320909 },
+    { url = "https://files.pythonhosted.org/packages/60/bf/8ebdfce77bc04b81abf2ea316e9c03b4a866a7d739cf355eae4d6fd9f6fe/jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b", size = 341733 },
+    { url = "https://files.pythonhosted.org/packages/a8/4e/754ebce77cff9ab34d1d0fa0fe98f5d42590fd33622509a3ba6ec37ff466/jiter-0.9.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f4c677c424dc76684fea3e7285a7a2a7493424bea89ac441045e6a1fb1d7b3b", size = 365097 },
+    { url = "https://files.pythonhosted.org/packages/32/2c/6019587e6f5844c612ae18ca892f4cd7b3d8bbf49461ed29e384a0f13d98/jiter-0.9.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2221176dfec87f3470b21e6abca056e6b04ce9bff72315cb0b243ca9e835a4b5", size = 406603 },
+    { url = "https://files.pythonhosted.org/packages/da/e9/c9e6546c817ab75a1a7dab6dcc698e62e375e1017113e8e983fccbd56115/jiter-0.9.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3c7adb66f899ffa25e3c92bfcb593391ee1947dbdd6a9a970e0d7e713237d572", size = 396625 },
+    { url = "https://files.pythonhosted.org/packages/be/bd/976b458add04271ebb5a255e992bd008546ea04bb4dcadc042a16279b4b4/jiter-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c98d27330fdfb77913c1097a7aab07f38ff2259048949f499c9901700789ac15", size = 351832 },
+    { url = "https://files.pythonhosted.org/packages/07/51/fe59e307aaebec9265dbad44d9d4381d030947e47b0f23531579b9a7c2df/jiter-0.9.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:eda3f8cc74df66892b1d06b5d41a71670c22d95a1ca2cbab73654745ce9d0419", size = 384590 },
+    { url = "https://files.pythonhosted.org/packages/db/55/5dcd2693794d8e6f4889389ff66ef3be557a77f8aeeca8973a97a7c00557/jiter-0.9.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dd5ab5ddc11418dce28343123644a100f487eaccf1de27a459ab36d6cca31043", size = 520690 },
+    { url = "https://files.pythonhosted.org/packages/54/d5/9f51dc90985e9eb251fbbb747ab2b13b26601f16c595a7b8baba964043bd/jiter-0.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:42f8a68a69f047b310319ef8e2f52fdb2e7976fb3313ef27df495cf77bcad965", size = 512649 },
+    { url = "https://files.pythonhosted.org/packages/a6/e5/4e385945179bcf128fa10ad8dca9053d717cbe09e258110e39045c881fe5/jiter-0.9.0-cp311-cp311-win32.whl", hash = "sha256:a25519efb78a42254d59326ee417d6f5161b06f5da827d94cf521fed961b1ff2", size = 206920 },
+    { url = "https://files.pythonhosted.org/packages/4c/47/5e0b94c603d8e54dd1faab439b40b832c277d3b90743e7835879ab663757/jiter-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:923b54afdd697dfd00d368b7ccad008cccfeb1efb4e621f32860c75e9f25edbd", size = 210119 },
+    { url = "https://files.pythonhosted.org/packages/af/d7/c55086103d6f29b694ec79156242304adf521577530d9031317ce5338c59/jiter-0.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:7b46249cfd6c48da28f89eb0be3f52d6fdb40ab88e2c66804f546674e539ec11", size = 309203 },
+    { url = "https://files.pythonhosted.org/packages/b0/01/f775dfee50beb420adfd6baf58d1c4d437de41c9b666ddf127c065e5a488/jiter-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:609cf3c78852f1189894383cf0b0b977665f54cb38788e3e6b941fa6d982c00e", size = 319678 },
+    { url = "https://files.pythonhosted.org/packages/ab/b8/09b73a793714726893e5d46d5c534a63709261af3d24444ad07885ce87cb/jiter-0.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d726a3890a54561e55a9c5faea1f7655eda7f105bd165067575ace6e65f80bb2", size = 341816 },
+    { url = "https://files.pythonhosted.org/packages/35/6f/b8f89ec5398b2b0d344257138182cc090302854ed63ed9c9051e9c673441/jiter-0.9.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e89dc075c1fef8fa9be219e249f14040270dbc507df4215c324a1839522ea75", size = 364152 },
+    { url = "https://files.pythonhosted.org/packages/9b/ca/978cc3183113b8e4484cc7e210a9ad3c6614396e7abd5407ea8aa1458eef/jiter-0.9.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04e8ffa3c353b1bc4134f96f167a2082494351e42888dfcf06e944f2729cbe1d", size = 406991 },
+    { url = "https://files.pythonhosted.org/packages/13/3a/72861883e11a36d6aa314b4922125f6ae90bdccc225cd96d24cc78a66385/jiter-0.9.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:203f28a72a05ae0e129b3ed1f75f56bc419d5f91dfacd057519a8bd137b00c42", size = 395824 },
+    { url = "https://files.pythonhosted.org/packages/87/67/22728a86ef53589c3720225778f7c5fdb617080e3deaed58b04789418212/jiter-0.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fca1a02ad60ec30bb230f65bc01f611c8608b02d269f998bc29cca8619a919dc", size = 351318 },
+    { url = "https://files.pythonhosted.org/packages/69/b9/f39728e2e2007276806d7a6609cda7fac44ffa28ca0d02c49a4f397cc0d9/jiter-0.9.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:237e5cee4d5d2659aaf91bbf8ec45052cc217d9446070699441a91b386ae27dc", size = 384591 },
+    { url = "https://files.pythonhosted.org/packages/eb/8f/8a708bc7fd87b8a5d861f1c118a995eccbe6d672fe10c9753e67362d0dd0/jiter-0.9.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:528b6b71745e7326eed73c53d4aa57e2a522242320b6f7d65b9c5af83cf49b6e", size = 520746 },
+    { url = "https://files.pythonhosted.org/packages/95/1e/65680c7488bd2365dbd2980adaf63c562d3d41d3faac192ebc7ef5b4ae25/jiter-0.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9f48e86b57bc711eb5acdfd12b6cb580a59cc9a993f6e7dcb6d8b50522dcd50d", size = 512754 },
+    { url = "https://files.pythonhosted.org/packages/78/f3/fdc43547a9ee6e93c837685da704fb6da7dba311fc022e2766d5277dfde5/jiter-0.9.0-cp312-cp312-win32.whl", hash = "sha256:699edfde481e191d81f9cf6d2211debbfe4bd92f06410e7637dffb8dd5dfde06", size = 207075 },
+    { url = "https://files.pythonhosted.org/packages/cd/9d/742b289016d155f49028fe1bfbeb935c9bf0ffeefdf77daf4a63a42bb72b/jiter-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:099500d07b43f61d8bd780466d429c45a7b25411b334c60ca875fa775f68ccb0", size = 207999 },
+]
+
 [[package]]
 name = "multidict"
 version = "6.1.0"
@@ -411,6 +516,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/17/7f/d322a4125405920401450118dbdc52e0384026bd669939484670ce8b2ab9/numpy-2.2.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:783145835458e60fa97afac25d511d00a1eca94d4a8f3ace9fe2043003c678e4", size = 12839607 },
 ]
 
+[[package]]
+name = "openai"
+version = "1.66.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/77/5172104ca1df35ed2ed8fb26dbc787f721c39498fc51d666c4db07756a0c/openai-1.66.3.tar.gz", hash = "sha256:8dde3aebe2d081258d4159c4cb27bdc13b5bb3f7ea2201d9bd940b9a89faf0c9", size = 397244 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/5a/e20182f7b6171642d759c548daa0ba20a1d3ac10d2bd0a13fd75704a9ac3/openai-1.66.3-py3-none-any.whl", hash = "sha256:a427c920f727711877ab17c11b95f1230b27767ba7a01e5b66102945141ceca9", size = 567400 },
+]
+
 [[package]]
 name = "packaging"
 version = "24.2"
@@ -420,6 +544,54 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
 ]
 
+[[package]]
+name = "pillow"
+version = "11.1.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/af/c097e544e7bd278333db77933e535098c259609c4eb3b85381109602fb5b/pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", size = 46742715 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/1c/2dcea34ac3d7bc96a1fd1bd0a6e06a57c67167fec2cff8d95d88229a8817/pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8", size = 3229983 },
+    { url = "https://files.pythonhosted.org/packages/14/ca/6bec3df25e4c88432681de94a3531cc738bd85dea6c7aa6ab6f81ad8bd11/pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192", size = 3101831 },
+    { url = "https://files.pythonhosted.org/packages/d4/2c/668e18e5521e46eb9667b09e501d8e07049eb5bfe39d56be0724a43117e6/pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2", size = 4314074 },
+    { url = "https://files.pythonhosted.org/packages/02/80/79f99b714f0fc25f6a8499ecfd1f810df12aec170ea1e32a4f75746051ce/pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26", size = 4394933 },
+    { url = "https://files.pythonhosted.org/packages/81/aa/8d4ad25dc11fd10a2001d5b8a80fdc0e564ac33b293bdfe04ed387e0fd95/pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07", size = 4353349 },
+    { url = "https://files.pythonhosted.org/packages/84/7a/cd0c3eaf4a28cb2a74bdd19129f7726277a7f30c4f8424cd27a62987d864/pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482", size = 4476532 },
+    { url = "https://files.pythonhosted.org/packages/8f/8b/a907fdd3ae8f01c7670dfb1499c53c28e217c338b47a813af8d815e7ce97/pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e", size = 4279789 },
+    { url = "https://files.pythonhosted.org/packages/6f/9a/9f139d9e8cccd661c3efbf6898967a9a337eb2e9be2b454ba0a09533100d/pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269", size = 4413131 },
+    { url = "https://files.pythonhosted.org/packages/a8/68/0d8d461f42a3f37432203c8e6df94da10ac8081b6d35af1c203bf3111088/pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49", size = 2291213 },
+    { url = "https://files.pythonhosted.org/packages/14/81/d0dff759a74ba87715509af9f6cb21fa21d93b02b3316ed43bda83664db9/pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a", size = 2625725 },
+    { url = "https://files.pythonhosted.org/packages/ce/1f/8d50c096a1d58ef0584ddc37e6f602828515219e9d2428e14ce50f5ecad1/pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65", size = 2375213 },
+    { url = "https://files.pythonhosted.org/packages/dd/d6/2000bfd8d5414fb70cbbe52c8332f2283ff30ed66a9cde42716c8ecbe22c/pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457", size = 3229968 },
+    { url = "https://files.pythonhosted.org/packages/d9/45/3fe487010dd9ce0a06adf9b8ff4f273cc0a44536e234b0fad3532a42c15b/pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35", size = 3101806 },
+    { url = "https://files.pythonhosted.org/packages/e3/72/776b3629c47d9d5f1c160113158a7a7ad177688d3a1159cd3b62ded5a33a/pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2", size = 4322283 },
+    { url = "https://files.pythonhosted.org/packages/e4/c2/e25199e7e4e71d64eeb869f5b72c7ddec70e0a87926398785ab944d92375/pillow-11.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f189805c8be5ca5add39e6f899e6ce2ed824e65fb45f3c28cb2841911da19070", size = 4402945 },
+    { url = "https://files.pythonhosted.org/packages/c1/ed/51d6136c9d5911f78632b1b86c45241c712c5a80ed7fa7f9120a5dff1eba/pillow-11.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:dd0052e9db3474df30433f83a71b9b23bd9e4ef1de13d92df21a52c0303b8ab6", size = 4361228 },
+    { url = "https://files.pythonhosted.org/packages/48/a4/fbfe9d5581d7b111b28f1d8c2762dee92e9821bb209af9fa83c940e507a0/pillow-11.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:837060a8599b8f5d402e97197d4924f05a2e0d68756998345c829c33186217b1", size = 4484021 },
+    { url = "https://files.pythonhosted.org/packages/39/db/0b3c1a5018117f3c1d4df671fb8e47d08937f27519e8614bbe86153b65a5/pillow-11.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:aa8dd43daa836b9a8128dbe7d923423e5ad86f50a7a14dc688194b7be5c0dea2", size = 4287449 },
+    { url = "https://files.pythonhosted.org/packages/d9/58/bc128da7fea8c89fc85e09f773c4901e95b5936000e6f303222490c052f3/pillow-11.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0a2f91f8a8b367e7a57c6e91cd25af510168091fb89ec5146003e424e1558a96", size = 4419972 },
+    { url = "https://files.pythonhosted.org/packages/5f/bb/58f34379bde9fe197f51841c5bbe8830c28bbb6d3801f16a83b8f2ad37df/pillow-11.1.0-cp311-cp311-win32.whl", hash = "sha256:c12fc111ef090845de2bb15009372175d76ac99969bdf31e2ce9b42e4b8cd88f", size = 2291201 },
+    { url = "https://files.pythonhosted.org/packages/3a/c6/fce9255272bcf0c39e15abd2f8fd8429a954cf344469eaceb9d0d1366913/pillow-11.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fbd43429d0d7ed6533b25fc993861b8fd512c42d04514a0dd6337fb3ccf22761", size = 2625686 },
+    { url = "https://files.pythonhosted.org/packages/c8/52/8ba066d569d932365509054859f74f2a9abee273edcef5cd75e4bc3e831e/pillow-11.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f7955ecf5609dee9442cbface754f2c6e541d9e6eda87fad7f7a989b0bdb9d71", size = 2375194 },
+    { url = "https://files.pythonhosted.org/packages/95/20/9ce6ed62c91c073fcaa23d216e68289e19d95fb8188b9fb7a63d36771db8/pillow-11.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:2062ffb1d36544d42fcaa277b069c88b01bb7298f4efa06731a7fd6cc290b81a", size = 3226818 },
+    { url = "https://files.pythonhosted.org/packages/b9/d8/f6004d98579a2596c098d1e30d10b248798cceff82d2b77aa914875bfea1/pillow-11.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a85b653980faad27e88b141348707ceeef8a1186f75ecc600c395dcac19f385b", size = 3101662 },
+    { url = "https://files.pythonhosted.org/packages/08/d9/892e705f90051c7a2574d9f24579c9e100c828700d78a63239676f960b74/pillow-11.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9409c080586d1f683df3f184f20e36fb647f2e0bc3988094d4fd8c9f4eb1b3b3", size = 4329317 },
+    { url = "https://files.pythonhosted.org/packages/8c/aa/7f29711f26680eab0bcd3ecdd6d23ed6bce180d82e3f6380fb7ae35fcf3b/pillow-11.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fdadc077553621911f27ce206ffcbec7d3f8d7b50e0da39f10997e8e2bb7f6a", size = 4412999 },
+    { url = "https://files.pythonhosted.org/packages/c8/c4/8f0fe3b9e0f7196f6d0bbb151f9fba323d72a41da068610c4c960b16632a/pillow-11.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:93a18841d09bcdd774dcdc308e4537e1f867b3dec059c131fde0327899734aa1", size = 4368819 },
+    { url = "https://files.pythonhosted.org/packages/38/0d/84200ed6a871ce386ddc82904bfadc0c6b28b0c0ec78176871a4679e40b3/pillow-11.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9aa9aeddeed452b2f616ff5507459e7bab436916ccb10961c4a382cd3e03f47f", size = 4496081 },
+    { url = "https://files.pythonhosted.org/packages/84/9c/9bcd66f714d7e25b64118e3952d52841a4babc6d97b6d28e2261c52045d4/pillow-11.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3cdcdb0b896e981678eee140d882b70092dac83ac1cdf6b3a60e2216a73f2b91", size = 4296513 },
+    { url = "https://files.pythonhosted.org/packages/db/61/ada2a226e22da011b45f7104c95ebda1b63dcbb0c378ad0f7c2a710f8fd2/pillow-11.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36ba10b9cb413e7c7dfa3e189aba252deee0602c86c309799da5a74009ac7a1c", size = 4431298 },
+    { url = "https://files.pythonhosted.org/packages/e7/c4/fc6e86750523f367923522014b821c11ebc5ad402e659d8c9d09b3c9d70c/pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6", size = 2291630 },
+    { url = "https://files.pythonhosted.org/packages/08/5c/2104299949b9d504baf3f4d35f73dbd14ef31bbd1ddc2c1b66a5b7dfda44/pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf", size = 2626369 },
+    { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 },
+    { url = "https://files.pythonhosted.org/packages/fa/c5/389961578fb677b8b3244fcd934f720ed25a148b9a5cc81c91bdf59d8588/pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90", size = 3198345 },
+    { url = "https://files.pythonhosted.org/packages/c4/fa/803c0e50ffee74d4b965229e816af55276eac1d5806712de86f9371858fd/pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb", size = 3072938 },
+    { url = "https://files.pythonhosted.org/packages/dc/67/2a3a5f8012b5d8c63fe53958ba906c1b1d0482ebed5618057ef4d22f8076/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442", size = 3400049 },
+    { url = "https://files.pythonhosted.org/packages/e5/a0/514f0d317446c98c478d1872497eb92e7cde67003fed74f696441e647446/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83", size = 3422431 },
+    { url = "https://files.pythonhosted.org/packages/cd/00/20f40a935514037b7d3f87adfc87d2c538430ea625b63b3af8c3f5578e72/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f", size = 3446208 },
+    { url = "https://files.pythonhosted.org/packages/28/3c/7de681727963043e093c72e6c3348411b0185eab3263100d4490234ba2f6/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73", size = 3509746 },
+    { url = "https://files.pythonhosted.org/packages/41/67/936f9814bdd74b2dfd4822f1f7725ab5d8ff4103919a1664eb4874c58b2f/pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", size = 2626353 },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.5.0"
@@ -656,6 +828,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
+]
+
 [[package]]
 name = "syrupy"
 version = "4.8.1"
@@ -688,7 +869,10 @@ version = "2.0.1"
 source = { virtual = "." }
 dependencies = [
     { name = "docker" },
+    { name = "huggingface-hub" },
     { name = "numpy" },
+    { name = "openai" },
+    { name = "pillow" },
     { name = "pydantic" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
@@ -699,7 +883,10 @@ dependencies = [
 [package.metadata]
 requires-dist = [
     { name = "docker", specifier = ">=7" },
+    { name = "huggingface-hub", specifier = ">=0.29" },
     { name = "numpy", specifier = ">=2.0" },
+    { name = "openai", specifier = ">=1.65" },
+    { name = "pillow", specifier = ">=11.1.0" },
     { name = "pydantic", specifier = ">2,<3" },
     { name = "pytest", specifier = ">=8.3.0" },
     { name = "pytest-asyncio", specifier = ">=0.23.1" },
@@ -741,7 +928,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
 wheels = [
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index fde1472f0..e3abb843d 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -97,11 +97,10 @@ fn get_config(
     let filename = if !path.exists() {
         // Assume it's a hub id
 
-        let mut builder = if let Ok(token) = std::env::var("HF_TOKEN") {
+        let mut builder = ApiBuilder::from_env();
+        if let Ok(token) = std::env::var("HF_TOKEN") {
             // env variable has precedence over on file token.
-            ApiBuilder::new().with_token(Some(token))
-        } else {
-            ApiBuilder::new()
+            builder = builder.with_token(Some(token))
         };
         if let Ok(origin) = env::var("HF_HUB_USER_AGENT_ORIGIN") {
             builder = builder.with_user_agent("origin", origin.as_str());
diff --git a/router/src/server.rs b/router/src/server.rs
index 0346b1f19..45d2b9f3c 100644
--- a/router/src/server.rs
+++ b/router/src/server.rs
@@ -1522,7 +1522,7 @@ pub async fn run(
 
     // Shared API builder initialization
     let api_builder = || {
-        let mut builder = ApiBuilder::new().with_progress(false);
+        let mut builder = ApiBuilder::from_env().with_progress(false);
         if let Some(token) = authorization_token {
             builder = builder.with_token(Some(token));
         }
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 87b28eb74..1119347dc 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -699,7 +699,7 @@ fn image_tokens(
             // TODO: prefer using the config to determine the number of features
             let num_mm_soft_tokens_per_image = 256;
             format!(
-                "\n\n<start_of_image>{:?}<end_of_image>\n\n",
+                "\n\n<start_of_image>{}<end_of_image>\n\n",
                 "<image_soft_token>".repeat(num_mm_soft_tokens_per_image)
             )
         }
diff --git a/server/text_generation_server/adapters/lora.py b/server/text_generation_server/adapters/lora.py
index cdcfe91b1..782d66e4e 100644
--- a/server/text_generation_server/adapters/lora.py
+++ b/server/text_generation_server/adapters/lora.py
@@ -205,7 +205,6 @@ class LoraWeights(AdapterWeights):
         lora_a_list = [None] * nlayers
         lora_b_list = [None] * nlayers
 
-        # import ipdb; ipdb.set_trace()
         for layer_id in range(nlayers):
             key = (layer_id, layer_type)
             if key not in target_to_layer:
diff --git a/server/text_generation_server/layers/attention/cuda.py b/server/text_generation_server/layers/attention/cuda.py
index 4f25cc192..fb50dda68 100644
--- a/server/text_generation_server/layers/attention/cuda.py
+++ b/server/text_generation_server/layers/attention/cuda.py
@@ -38,6 +38,7 @@ def paged_attention(
     *,
     kv_scales: KVScales,
     softcap: Optional[float] = None,
+    window_size_left: Optional[int] = -1,
 ):
     # Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py
     # Copyright 2023 The vLLM team. All rights
@@ -79,12 +80,15 @@ def paged_attention(
             sm_scale=softmax_scale,
             k_scale=kv_scales.key_scale_cpu if can_scale else 1.0,
             v_scale=kv_scales.value_scale_cpu if can_scale else 1.0,
+            window_left=window_size_left,
         )
     elif ATTENTION == "flashdecoding":
         max_q = 1
         max_k = max_s
         import flash_attn_2_cuda
 
+        window_size_right = -1 if window_size_left == -1 else 0
+
         # TODO fixme when flash contains the fix.
         # Number of splits is not correctly handled
         # by the current path
@@ -109,8 +113,8 @@ def paged_attention(
             softmax_scale,
             False,  # zero_tensors
             True,  # causal
-            -1,  # Window_left
-            -1,  # Window right
+            window_size_left,  # Window_left
+            window_size_right,  # Window right
             softcap,
             False,  # return softmax
             None,  # generator
@@ -253,6 +257,7 @@ def attention(
             sm_scale=softmax_scale,
             k_scale=kv_scales.key_scale_cpu if can_scale else 1.0,
             v_scale=kv_scales.value_scale_cpu if can_scale else 1.0,
+            window_left=window_size_left,
         )
 
     # If we are using flashdecoding or paged, we always use flash-attn for
diff --git a/server/text_generation_server/layers/attention/flashinfer.py b/server/text_generation_server/layers/attention/flashinfer.py
index d23451844..9479b6067 100644
--- a/server/text_generation_server/layers/attention/flashinfer.py
+++ b/server/text_generation_server/layers/attention/flashinfer.py
@@ -52,7 +52,6 @@ def use_prefill_with_paged_kv_state(
     page_size: int,
     kv_dtype: torch.dtype,
     q_dtype: torch.dtype,
-    window_left: int,
 ):
     """
     Context manager to set the active flashinfer prefill state to the given
@@ -95,7 +94,6 @@ def use_prefill_with_paged_kv_state(
             kv_data_type=kv_dtype,
             q_data_type=q_dtype,
             page_size=page_size,
-            window_left=-1 if window_left is None else window_left,
         )
         yield
     finally:
@@ -172,7 +170,6 @@ def use_decode_state(
     page_size: int,
     kv_cache_dtype: torch.dtype,
     q_dtype: torch.dtype,
-    window_left: int,
 ):
     """
     Context manager to set the active flashinfer decoding state to the given
@@ -209,7 +206,6 @@ def use_decode_state(
             page_size=page_size,
             data_type=kv_cache_dtype,
             q_data_type=q_dtype,
-            window_left=-1 if window_left is None else window_left,
         )
         yield
     finally:
diff --git a/server/text_generation_server/layers/attention/ipex.py b/server/text_generation_server/layers/attention/ipex.py
index 54422308f..2b89060e9 100644
--- a/server/text_generation_server/layers/attention/ipex.py
+++ b/server/text_generation_server/layers/attention/ipex.py
@@ -78,6 +78,7 @@ def paged_attention(
     *,
     kv_scales: KVScales,
     softcap: Optional[float] = None,
+    window_size_left: Optional[int] = -1,
 ):
     if softcap is not None:
         raise NotImplementedError("softcap is not available in IPEX")
diff --git a/server/text_generation_server/layers/attention/rocm.py b/server/text_generation_server/layers/attention/rocm.py
index 65f3ea414..518e55eee 100644
--- a/server/text_generation_server/layers/attention/rocm.py
+++ b/server/text_generation_server/layers/attention/rocm.py
@@ -59,6 +59,7 @@ def paged_attention(
     *,
     kv_scales: KVScales,
     softcap: Optional[float] = None,
+    window_size_left: Optional[int] = -1,
 ):
     # Adapted from: https://github.com/vllm-project/vllm/blob/f8a1e39fae05ca610be8d5a78be9d40f5274e5fc/vllm/model_executor/layers/attention.py
     # Copyright 2023 The vLLM team. All rights
@@ -82,6 +83,8 @@ def paged_attention(
         max_k = max_s
         import flash_attn_2_cuda
 
+        window_size_right = -1 if window_size_left == -1 else 0
+
         if softcap is None:
             softcap = 0.0
         out = flash_attn_2_cuda.varlen_fwd(
@@ -101,8 +104,8 @@ def paged_attention(
             softmax_scale,
             False,  # zero_tensors
             True,  # causal
-            -1,  # Window_left
-            -1,  # Window right
+            window_size_left,  # Window_left
+            window_size_right,  # Window right
             softcap,
             False,  # return softmax
             None,  # generator
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 0fdc009ca..ab830b58b 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -272,12 +272,12 @@ class ModelType(enum.Enum):
     GEMMA3 = {
         "type": "gemma3",
         "name": "Gemma3",
-        "url": "https://huggingface.co/collections/google/gemma-3",
+        "url": "https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d",
     }
     GEMMA3_TEXT = {
         "type": "gemma3_text",
         "name": "Gemma3 Text",
-        "url": "https://huggingface.co/collections/google/gemma-3",
+        "url": "https://huggingface.co/collections/google/gemma-3-release-67c6c6f89c4f76621268bb6d",
     }
     COHERE = {
         "type": "cohere",
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
index ebf1b80eb..2554bd269 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma2_modeling.py
@@ -287,6 +287,7 @@ class FlashGemma2Attention(torch.nn.Module):
                 max_s,
                 softcap=self.softcap,
                 kv_scales=self.kv_scales,
+                window_size_left=self.window_size,
             )
 
         return self.o_proj(
diff --git a/server/text_generation_server/models/custom_modeling/flash_gemma3_modeling.py b/server/text_generation_server/models/custom_modeling/flash_gemma3_modeling.py
index 085f57ef1..70fe9a3db 100644
--- a/server/text_generation_server/models/custom_modeling/flash_gemma3_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_gemma3_modeling.py
@@ -281,22 +281,12 @@ class FlashGemma3Attention(torch.nn.Module):
                 padded_query = padded_query.transpose(1, 2).contiguous()
                 padded_key = padded_key.transpose(1, 2).contiguous()
                 padded_value = padded_value.transpose(1, 2).contiguous()
-                zeros_to_add = torch.zeros(
-                    padded_key.size(0),
-                    self.num_key_value_heads,
-                    1,
-                    self.head_size,
-                    dtype=padded_key.dtype,
-                    device=padded_key.device,
-                )
-                key_states = torch.cat([padded_key, zeros_to_add], dim=2)
-                value_states = torch.cat([padded_value, zeros_to_add], dim=2)
 
                 # Compute attention
                 attn_output = F.scaled_dot_product_attention(
                     padded_query,
-                    key_states,
-                    value_states,
+                    padded_key,
+                    padded_value,
                     attn_mask=attention_mask,
                     scale=self.softmax_scale,
                     enable_gqa=self.enable_gqa,
@@ -327,6 +317,7 @@ class FlashGemma3Attention(torch.nn.Module):
                 max_s,
                 softcap=self.softcap,
                 kv_scales=self.kv_scales,
+                window_size_left=self.window_size,
             )
 
         return self.o_proj(
@@ -513,6 +504,7 @@ class FlashGemma3Model(torch.nn.Module):
         max_s: int,
         adapter_data: Optional[torch.Tensor] = None,
         attention_mask: Optional[torch.Tensor] = None,
+        attention_mask_local: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         hidden_states = inputs_embeds
 
@@ -525,25 +517,6 @@ class FlashGemma3Model(torch.nn.Module):
                 position_ids, max_s, hidden_states.dtype
             )
 
-            # apply sliding window mask if needed
-            if layer.self_attn.window_size > 0 and attention_mask is not None:
-                min_dtype = torch.finfo(hidden_states.dtype).min
-                # prefill may be larger than sliding window
-                effective_seq_len = max(
-                    position_ids.shape[0], self.layers[i].self_attn.window_size
-                )
-                sliding_window_mask = torch.tril(
-                    torch.ones_like(attention_mask, dtype=torch.bool),
-                    diagonal=-self.layers[i].self_attn.window_size,
-                )
-                attention_mask = torch.where(
-                    sliding_window_mask, min_dtype, attention_mask
-                )
-                offset = max(0, position_ids.shape[0] - effective_seq_len)
-                attention_mask = attention_mask[
-                    :, :, offset : offset + effective_seq_len
-                ]
-
             hidden_states, residual = layer(
                 hidden_states,
                 residual,
@@ -556,7 +529,11 @@ class FlashGemma3Model(torch.nn.Module):
                 seqlen,
                 max_s,
                 adapter_data,
-                attention_mask,
+                (
+                    attention_mask
+                    if self.layers[i].self_attn.window_size == -1
+                    else attention_mask_local
+                ),
             )
 
         hidden_states, _ = self.norm(hidden_states, residual)
@@ -723,24 +700,6 @@ class Gemma3ForConditionalGeneration(nn.Module):
             config.pad_token_id if config.pad_token_id is not None else -1
         )
 
-    def get_image_token_mask(self, input_ids):
-        device = input_ids.device
-
-        start_token_id = self.config.boi_token_index
-        K = self.config.mm_tokens_per_image
-
-        mask = torch.zeros_like(input_ids, dtype=torch.bool, device=device)
-        start_positions = (input_ids == start_token_id).nonzero(as_tuple=True)[0]
-        mask_indices = start_positions.unsqueeze(1) + torch.arange(
-            1, K + 1, device=device
-        ).unsqueeze(0)
-
-        valid_mask = mask_indices < input_ids.size(0)
-        mask_indices = mask_indices[valid_mask]
-        mask[mask_indices] = True
-
-        return mask
-
     def get_attention_mask(
         self, input_ids, max_s, cu_seqlen_prefill, dtype, image_token_mask
     ):
@@ -751,7 +710,7 @@ class Gemma3ForConditionalGeneration(nn.Module):
         batch_size = len(lengths)
 
         sequence_length = max(lengths)
-        target_length = max_s
+        target_length = sequence_length
         # Create the padding mask from the computed lengths.
         # pad_mask: [batch, sequence_length] where True indicates valid tokens.
         seq_range = torch.arange(sequence_length, device=device).unsqueeze(0)
@@ -847,7 +806,7 @@ class Gemma3ForConditionalGeneration(nn.Module):
 
         #         # Determine the maximum sequence length (after padding) from query.
         #         sequence_length = max(lengths)
-        #         target_length = max_s
+        #         target_length = sequence_length
 
         #         # Create the padding mask from the computed lengths.
         #         # pad_mask: [batch, sequence_length] where True indicates valid tokens.
@@ -885,6 +844,26 @@ class Gemma3ForConditionalGeneration(nn.Module):
         #             input_ids.device
         #         )
 
+        if attention_mask is not None:
+            min_dtype = torch.finfo(inputs_embeds.dtype).min
+            # prefill may be larger than sliding window
+            effective_seq_len = max(
+                position_ids.shape[0], self.config.text_config.sliding_window
+            )
+            sliding_window_mask = torch.tril(
+                torch.ones_like(attention_mask, dtype=torch.bool),
+                diagonal=-self.config.text_config.sliding_window,
+            )
+            attention_mask_local = torch.where(
+                sliding_window_mask, min_dtype, attention_mask
+            )
+            offset = max(0, position_ids.shape[0] - effective_seq_len)
+            attention_mask_local = attention_mask_local[
+                :, :, :, offset : offset + effective_seq_len
+            ]
+        else:
+            attention_mask_local = None
+
         hidden_states = self.text_model.model(
             inputs_embeds=inputs_embeds,
             position_ids=position_ids,
@@ -895,6 +874,7 @@ class Gemma3ForConditionalGeneration(nn.Module):
             seqlen=seqlen,
             max_s=max_s,
             attention_mask=attention_mask,
+            attention_mask_local=attention_mask_local,
         )
 
         if lm_head_indices is not None:
diff --git a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
index 0fa172d03..7ad294f4b 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@@ -242,6 +242,7 @@ class MistralAttention(torch.nn.Module):
                 seqlen,
                 max_s,
                 kv_scales=self.kv_scales,
+                window_size_left=self.max_past,
             )
 
         return self.o_proj(
diff --git a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
index a45dd1e61..e2a3e5860 100644
--- a/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py
@@ -290,6 +290,7 @@ class MixtralAttention(torch.nn.Module):
                 seqlen,
                 max_s,
                 kv_scales=self.kv_scales,
+                window_size_left=self.max_past,
             )
 
         return self.o_proj(attn_output.view(-1, self.num_heads * self.head_size))
diff --git a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
index 4ea604510..b1f89eff4 100644
--- a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
@@ -31,7 +31,7 @@ class PaliGemmaForConditionalGeneration(nn.Module):
         super().__init__()
         config.vision_config.quantize = config.quantize
         self.vision_tower = load_vision_model(
-            prefix="vision_model" if not prefix else f"{prefix}.vision_model",
+            prefix="vision_tower" if not prefix else f"{prefix}.vision_tower",
             config=config.vision_config,
             weights=weights,
         )
diff --git a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
index 9d9562222..f5e4e15ce 100644
--- a/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_qwen2_modeling.py
@@ -74,7 +74,7 @@ class Qwen2Attention(torch.nn.Module):
         weights,
     ):
         super().__init__()
-        self.max_past = (
+        self.window_size = (
             config.sliding_window if config.sliding_window is not None else -1
         )
         self.num_heads = config.num_attention_heads
@@ -172,7 +172,7 @@ class Qwen2Attention(torch.nn.Module):
                 seqlen=seqlen,
                 block_tables=block_tables,
                 softmax_scale=self.softmax_scale,
-                window_size_left=self.max_past,
+                window_size_left=self.window_size,
             )
         # Decode
         else:
@@ -185,6 +185,7 @@ class Qwen2Attention(torch.nn.Module):
                 seqlen,
                 max_s,
                 kv_scales=self.kv_scales,
+                window_size_left=self.window_size,
             )
 
         return self.o_proj(
@@ -405,10 +406,10 @@ class Qwen2ForCausalLM(torch.nn.Module):
             weights=weights,
         )
 
-        self.max_past = config.sliding_window
-        self.max_past_tensor = (
+        self.window_size = config.sliding_window
+        self.window_size_tensor = (
             torch.tensor(config.sliding_window, device=weights.device)
-            if self.max_past is not None
+            if self.window_size is not None
             else None
         )
 
@@ -430,10 +431,10 @@ class Qwen2ForCausalLM(torch.nn.Module):
         if prefill_cache_indices is not None:
             # Slots also need to be sliced as it has the same size as the whole kv tensor
             slots = slots[prefill_cache_indices]
-        elif self.max_past is not None:
+        elif self.window_size is not None:
             # Clamp in decode mode as paged attention requires clamped values whereas the flash attention
             # kernel requires the true values
-            seqlen = seqlen.clamp(max=self.max_past_tensor)
+            seqlen = seqlen.clamp(max=self.window_size_tensor)
 
         inputs_embeds = self.embed_tokens(input_ids)
 
diff --git a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
index 5e090369b..9508cc4f8 100644
--- a/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_starcoder2_modeling.py
@@ -291,6 +291,7 @@ class Starcoder2Attention(torch.nn.Module):
                 seqlen,
                 max_s,
                 kv_scales=self.kv_scales,
+                window_size_left=self.max_past,
             )
 
         return self.o_proj(
diff --git a/server/text_generation_server/models/custom_modeling/gemma3/image_processing_gemma3.py b/server/text_generation_server/models/custom_modeling/gemma3/image_processing_gemma3.py
index 803d81ead..2972abeab 100644
--- a/server/text_generation_server/models/custom_modeling/gemma3/image_processing_gemma3.py
+++ b/server/text_generation_server/models/custom_modeling/gemma3/image_processing_gemma3.py
@@ -263,7 +263,7 @@ class Gemma3ImageProcessor(BaseImageProcessor):
         return_tensors: Optional[Union[str, TensorType]] = None,
         data_format: Optional[ChannelDimension] = ChannelDimension.FIRST,
         input_data_format: Optional[Union[str, ChannelDimension]] = None,
-        do_convert_rgb: bool = None,
+        do_convert_rgb: bool = True,
         do_pan_and_scan: bool = None,
         pan_and_scan_min_crop_size: int = None,
         pan_and_scan_max_num_crops: int = None,
diff --git a/server/text_generation_server/models/custom_modeling/gemma3/processing_gemma3.py b/server/text_generation_server/models/custom_modeling/gemma3/processing_gemma3.py
index 08e39a7c6..6bdf35c63 100644
--- a/server/text_generation_server/models/custom_modeling/gemma3/processing_gemma3.py
+++ b/server/text_generation_server/models/custom_modeling/gemma3/processing_gemma3.py
@@ -82,7 +82,7 @@ class Gemma3Processor(ProcessorMixin):
             do_rescale=False,
             resample=PILImageResampling.BILINEAR,
         )
-        # import ipdb; ipdb.set_trace()
+
         self.image_token_id = tokenizer.image_token_id
         image_tokens_expanded = "".join(
             [tokenizer.image_token] * num_mm_soft_tokens_per_image
@@ -91,8 +91,6 @@ class Gemma3Processor(ProcessorMixin):
             f"\n\n{tokenizer.boi_token}{image_tokens_expanded}{tokenizer.eoi_token}\n\n"
         )
 
-        # import ipdb; ipdb.set_trace()
-
         self.image_processor = image_processor
         self.tokenizer = tokenizer
         self.chat_template = chat_template
diff --git a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py
index e317c5b56..066de6a20 100644
--- a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py
+++ b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py
@@ -633,7 +633,7 @@ class Qwen2_5VisionModel(nn.Module):
             config=config,
             weights=weights,
         )
-        # import ipdb; ipdb.set_trace()
+
         self.temporal_patch_size = config.temporal_patch_size
         self.spatial_patch_size = config.spatial_patch_size
         self.in_channels = config.in_channels
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index e268af8b4..d3a83e271 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -83,24 +83,11 @@ from text_generation_server.models.metadata_kernels import (
 
 tracer = trace.get_tracer(__name__)
 
-# Will be set in init
-SLIDING_WINDOW: Optional[int] = None
-
 
 def small_power_of_2(n: int):
     return 1 << ((n - 1).bit_length() - 1)
 
 
-def set_sliding_window(sliding_window: int):
-    global SLIDING_WINDOW
-    SLIDING_WINDOW = sliding_window
-
-
-def get_sliding_windows() -> int:
-    global SLIDING_WINDOW
-    return SLIDING_WINDOW
-
-
 def init_cpu_threads_env(rank_id: int, world_size: int):
     import importlib.util
 
@@ -1002,10 +989,8 @@ class FlashCausalLMBatch(Batch):
                 self.slot_indices,
             )
 
-        sliding_window = get_sliding_windows()
         position_ids = []
         slot_indices = []
-        prefill_cache_indices = []
         all_prefill_logprobs = True
         no_prefill_logprobs = True
         prefill_cu_outlens = [0]
@@ -1064,14 +1049,6 @@ class FlashCausalLMBatch(Batch):
                 # Update
                 cumulative_slot_tokens += len(request_slots)
 
-            # Create tensor to slice into the kv tensor in prefill
-            if sliding_window is not None:
-                request_prefill_cache_indices = torch.arange(
-                    cumulative_length + max(0, input_length - sliding_window),
-                    cumulative_length + input_length,
-                    dtype=torch.int64,
-                )
-
             # Prefill logprobs is ignored if the request is done prefilling
             prefill_logprobs = r.prefill_logprobs and request_prefilling
 
@@ -1085,9 +1062,6 @@ class FlashCausalLMBatch(Batch):
                 prefill_cu_outlens.append(prefill_out_cumulative_length + 1)
                 prefill_out_cumulative_length += 1
 
-            if sliding_window is not None:
-                prefill_cache_indices.append(request_prefill_cache_indices)
-
             ADAPTER_TO_INDEX = get_adapter_to_index()
             if ADAPTER_TO_INDEX:
                 adapter_index = ADAPTER_TO_INDEX.get(r.adapter_id, 0)
@@ -1151,24 +1125,18 @@ class FlashCausalLMBatch(Batch):
                 position_ids = torch.cat(position_ids)
             if slot_indices:
                 slot_indices = torch.cat(slot_indices)
-            if sliding_window is not None:
-                prefill_cache_indices = torch.cat(prefill_cache_indices)
         else:
             if position_ids:
                 position_ids = position_ids[0]
             if slot_indices:
                 slot_indices = slot_indices[0]
-            if sliding_window is not None:
-                prefill_cache_indices = prefill_cache_indices[0]
 
         if not has_triton():
             self.position_ids = position_ids.to(device)
             self.slot_indices = slot_indices.to(device)
 
         self.prefill_cu_outlens = prefill_cu_outlens
-        self.prefill_cache_indices = (
-            prefill_cache_indices.to(device) if sliding_window is not None else None
-        )
+        self.prefill_cache_indices = None
 
         if all_prefill_logprobs:
             prefill_head_indices = None
@@ -1306,9 +1274,7 @@ class FlashCausalLM(Model):
         if text_config is not None:
             config = text_config
 
-        if getattr(config, "sliding_window", None) is not None:
-            set_sliding_window(config.sliding_window)
-        else:
+        if getattr(config, "sliding_window", None) is None:
             config.sliding_window = None
 
         self.num_layers = config.num_hidden_layers
@@ -2500,7 +2466,6 @@ class FlashCausalLM(Model):
                 page_size=BLOCK_SIZE,
                 kv_dtype=self.kv_cache_dtype,
                 q_dtype=self.dtype,
-                window_left=self.sliding_window,
             )
         else:
             assert input_lengths_tensor is not None
@@ -2514,5 +2479,4 @@ class FlashCausalLM(Model):
                 page_size=BLOCK_SIZE,
                 kv_cache_dtype=self.kv_cache_dtype,
                 q_dtype=self.dtype,
-                window_left=self.sliding_window,
             )
diff --git a/server/text_generation_server/models/model.py b/server/text_generation_server/models/model.py
index af4d1f082..da317a628 100644
--- a/server/text_generation_server/models/model.py
+++ b/server/text_generation_server/models/model.py
@@ -110,7 +110,7 @@ class Model(ABC):
             requires_padding=self.requires_padding,
             dtype=str(self.dtype),
             device_type=self.device.type,
-            window_size=self.sliding_window,
+            window_size=None,  # Setting this parameter to None disabled the block logic with sliding window.
             speculate=self.speculate,
             support_chunking=self.support_chunking,
             use_prefix_caching=PREFIX_CACHING,