diff --git a/Dockerfile b/Dockerfile
index 65376b0b..9782f931 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -193,7 +193,8 @@ RUN cd server && \
     pwd && \
     text-generation-server --help
 
-RUN uv pip install torchvision --no-deps
+# This shouldn't be necessary.
+# RUN uv pip install torchvision --no-deps
 
 # Copy build artifacts from flash attention builder
 COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
diff --git a/integration-tests/models/test_transformers_llama4.py b/integration-tests/models/test_transformers_llama4.py
index a73138d1..a20d3284 100644
--- a/integration-tests/models/test_transformers_llama4.py
+++ b/integration-tests/models/test_transformers_llama4.py
@@ -152,4 +152,4 @@ async def test_flash_llama4_image_base64_rgb_jpg(flash_llama4, response_snapshot
         ],
         max_tokens=100,
     )
-    assert response == response_snapshot
\ No newline at end of file
+    assert response == response_snapshot
diff --git a/router/src/config.rs b/router/src/config.rs
index 0074b29a..8188e535 100644
--- a/router/src/config.rs
+++ b/router/src/config.rs
@@ -153,12 +153,9 @@ fn find_supported_resolutions(max_num_chunks: usize, height: usize) -> Vec<(usiz
 
         for (h, w) in _asp_ratios {
             let divisor = gcd(h, w);
-            let key = (h / divisor, w / divisor);  // reduced aspect ratio as key
+            let key = (h / divisor, w / divisor); // reduced aspect ratio as key
 
-            if !asp_dict.contains_key(&key) {
-                asp_dict.insert(key, vec![]);
-            }
-            asp_dict.get_mut(&key).unwrap().push((h, w));
+            asp_dict.entry(key).or_default().push((h, w));
         }
     }
 
@@ -176,7 +173,7 @@ fn find_supported_resolutions(max_num_chunks: usize, height: usize) -> Vec<(usiz
 fn get_best_fit(
     original_height: usize,
     original_width: usize,
-    possible_resolutions: &Vec<(usize, usize)>,
+    possible_resolutions: &[(usize, usize)],
     resize_to_max_canvas: bool,
 ) -> (usize, usize) {
     let orig_h = original_height as f32;
@@ -194,20 +191,13 @@ fn get_best_fit(
     let upscaling_options: Vec<f32> = scales.iter().copied().filter(|&s| s >= 1.0).collect();
     let selected_scale = if !upscaling_options.is_empty() {
         if resize_to_max_canvas {
-            upscaling_options
-                .into_iter()
-                .fold(f32::MIN, f32::max)
+            upscaling_options.into_iter().fold(f32::MIN, f32::max)
         } else {
-            upscaling_options
-                .into_iter()
-                .fold(f32::MAX, f32::min)
+            upscaling_options.into_iter().fold(f32::MAX, f32::min)
         }
     } else {
-        let downscaling_options: Vec<f32> =
-            scales.iter().copied().filter(|&s| s < 1.0).collect();
-        downscaling_options
-            .into_iter()
-            .fold(f32::MIN, f32::max)
+        let downscaling_options: Vec<f32> = scales.iter().copied().filter(|&s| s < 1.0).collect();
+        downscaling_options.into_iter().fold(f32::MIN, f32::max)
     };
 
     let chosen_canvas: Vec<(usize, usize)> = possible_resolutions
@@ -375,7 +365,6 @@ pub struct Gemma3 {
     vision_config: Gemma3VisionConfig,
 }
 
-
 #[derive(Clone, Debug, Serialize, Deserialize)]
 #[serde(tag = "model_type")]
 #[serde(rename_all = "snake_case")]
diff --git a/router/src/lib.rs b/router/src/lib.rs
index e2c0f921..50adb5cf 100644
--- a/router/src/lib.rs
+++ b/router/src/lib.rs
@@ -207,7 +207,6 @@ pub struct Llama4Processor {
     do_image_splitting: bool,
 }
 
-
 #[derive(Debug, Clone, Deserialize, Default)]
 pub struct HubProcessorConfig {
     pub chat_template: Option<ChatTemplateVersions>,
diff --git a/router/src/validation.rs b/router/src/validation.rs
index 3813e358..2d1d9a3d 100644
--- a/router/src/validation.rs
+++ b/router/src/validation.rs
@@ -698,7 +698,8 @@ fn image_tokens(
             let image_height = config.image_size();
             let patch_size = config.patch_size();
             let pixel_shuffle_ratio = config.pixel_shuffle_ratio();
-            let downsample_ratio = (1.0 / (pixel_shuffle_ratio * pixel_shuffle_ratio)).round() as usize;
+            let downsample_ratio =
+                (1.0 / (pixel_shuffle_ratio * pixel_shuffle_ratio)).round() as usize;
 
             let (ratio_h, ratio_w) = config.get_aspect_ratios(height, width);
             let image_width = image_height; // Assuming pixel shape: [H][W][C]
@@ -726,7 +727,7 @@ fn image_tokens(
             img_string.push_str(IMAGE_END);
 
             img_string
-        },
+        }
         Qwen2Vl(config) => format!(
             "<|vision_start|>{:?}<|vision_end|>",
             "<|image_pad|>".repeat(config.get_number_of_features(height, width))
@@ -770,8 +771,8 @@ fn prepare_input<T: TokenizerTrait>(
     static RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"!\[\]\([^\)]*\)").unwrap());
     let (tokenizer_query, input_chunks) = match config {
         Some(
-            config @ (Idefics | Mllama | Idefics2(_) | Idefics3(_) | Gemma3(_) | Llama4(_) | Paligemma(_)
-            | LlavaNext(_) | Qwen2Vl(_) | Qwen2_5Vl(_)),
+            config @ (Idefics | Mllama | Idefics2(_) | Idefics3(_) | Gemma3(_) | Llama4(_)
+            | Paligemma(_) | LlavaNext(_) | Qwen2Vl(_) | Qwen2_5Vl(_)),
         ) => {
             let mut input_chunks = Vec::new();
             let mut tokenizer_query = String::with_capacity(inputs.len());
diff --git a/server/text_generation_server/models/transformers_flash_vlm.py b/server/text_generation_server/models/transformers_flash_vlm.py
index ff385017..a7beb68b 100644
--- a/server/text_generation_server/models/transformers_flash_vlm.py
+++ b/server/text_generation_server/models/transformers_flash_vlm.py
@@ -395,7 +395,7 @@ class TransformersFlashVlmCausalLM(VlmCausalLM):
             image_grid_thw=image_grid_thw,
             attention_mask=inputs.get("attention_mask", None),
             use_sdpa=inputs.get("use_sdpa", False),
-            cache_position=inputs.get("cache_position", None)
+            cache_position=inputs.get("cache_position", None),
         ).logits
 
         logits = self.post_process_outputs(logits, lm_head_indices)
@@ -560,9 +560,7 @@ class TransformersGemma3VlmCausalLM(TransformersFlashVlmCausalLM):
 
 class TransformersLlama4VlmCausalLM(TransformersFlashVlmCausalLM):
     def pre_process_inputs(self, input_ids, position_ids, cu_seqlen_prefill):
-        inputs = super().pre_process_inputs(
-            input_ids, position_ids, cu_seqlen_prefill
-        )
+        inputs = super().pre_process_inputs(input_ids, position_ids, cu_seqlen_prefill)
         inputs["cache_position"] = position_ids
         inputs["attention_mask"] = torch.zeros((1, 1, 1, 1), device=input_ids.device)
-        return inputs
\ No newline at end of file
+        return inputs