From 61ccbf6bbdcb0774cb792618f808af72c120d7dc Mon Sep 17 00:00:00 2001
From: Mohit Sharma <mohit21sharma.ms@gmail.com>
Date: Mon, 28 Apr 2025 13:02:40 +0000
Subject: [PATCH] update paligemma

---
 .../models/custom_modeling/flash_pali_gemma_modeling.py       | 4 +---
 server/text_generation_server/models/vlm_causal_lm.py         | 2 --
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
index ef222c76..0ea3a868 100644
--- a/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/flash_pali_gemma_modeling.py
@@ -77,9 +77,7 @@ class PaliGemmaForConditionalGeneration(nn.Module):
             image_outputs.last_hidden_state
         )
         image_features = self.multi_modal_projector(last_hidden_state)
-        image_features = image_features.view(
-            image_features.shape[0], image_features.shape[1], -1
-        )
+        image_features = image_features.view(-1, image_features.shape[-1])
         return image_features
 
     def get_inputs_embeds(
diff --git a/server/text_generation_server/models/vlm_causal_lm.py b/server/text_generation_server/models/vlm_causal_lm.py
index b7fd88a6..42ba15bf 100644
--- a/server/text_generation_server/models/vlm_causal_lm.py
+++ b/server/text_generation_server/models/vlm_causal_lm.py
@@ -1064,8 +1064,6 @@ class VlmCausalLM(FlashCausalLM):
                 )
                 if batch.prefill_cache_indices is not None:
                     batch.prefill_cache_indices = None
-                if batch.pixel_values is not None:
-                    batch.pixel_values = None
                 batch.free_encoder_cache()
                 return logits, speculative_logits