diff --git a/integration-tests/requirements.txt b/integration-tests/requirements.txt
index fe4e929f..ca2dee93 100644
--- a/integration-tests/requirements.txt
+++ b/integration-tests/requirements.txt
@@ -39,7 +39,7 @@ httpcore==1.0.7
     # via httpx
 httpx==0.28.1
     # via openai
-huggingface-hub==0.30.1
+huggingface-hub==0.29.3
     # via
     #   text-generation-integration-tests (pyproject.toml)
     #   text-generation
diff --git a/server/text_generation_server/models/custom_modeling/bloom_modeling.py b/server/text_generation_server/models/custom_modeling/bloom_modeling.py
index 7c8a6926..84835ab8 100644
--- a/server/text_generation_server/models/custom_modeling/bloom_modeling.py
+++ b/server/text_generation_server/models/custom_modeling/bloom_modeling.py
@@ -661,7 +661,7 @@ class BloomModel(BloomPreTrainedModel):
 
         return combined_attention_mask
 
-    def set_inputs_embeds(self, new_embeddings: torch.Tensor):
+    def set_input_embeddings(self, new_embeddings: torch.Tensor):
         self.word_embeddings = new_embeddings
 
     def forward(
diff --git a/server/text_generation_server/models/custom_modeling/mllama.py b/server/text_generation_server/models/custom_modeling/mllama.py
index 7d60c098..be0a4b5d 100644
--- a/server/text_generation_server/models/custom_modeling/mllama.py
+++ b/server/text_generation_server/models/custom_modeling/mllama.py
@@ -959,7 +959,6 @@ class MllamaForConditionalGeneration(nn.Module):
         # XXX: Putting these as optional so that the cuda warmup calls can go through.
         cross_attention_states: Optional[torch.Tensor] = None,
         image_indices=None,
-        inputs_embeds=None,
     ):
         if cross_attention_states is not None:
             seqlen_q = len(image_indices)