diff --git a/integration-tests/models/test_flash_phi35_moe.py b/integration-tests/models/test_flash_phi35_moe.py
index d3043b02..0cb8f85d 100644
--- a/integration-tests/models/test_flash_phi35_moe.py
+++ b/integration-tests/models/test_flash_phi35_moe.py
@@ -6,6 +6,7 @@ def flash_phi35_moe_handle(launcher):
     with launcher(
         "microsoft/Phi-3.5-MoE-instruct",
         num_shard=4,
+        max_batch_prefill_tokens=10000,
     ) as handle:
         yield handle
 
diff --git a/integration-tests/models/test_mllama.py b/integration-tests/models/test_mllama.py
index 9cece236..f2335690 100644
--- a/integration-tests/models/test_mllama.py
+++ b/integration-tests/models/test_mllama.py
@@ -4,7 +4,10 @@ import asyncio
 
 @pytest.fixture(scope="module")
 def mllama_handle(launcher):
-    with launcher("meta-llama/Llama-3.2-11B-Vision-Instruct", num_shard=2) as handle:
+    with launcher(
+        "meta-llama/Llama-3.2-11B-Vision-Instruct",
+        num_shard=2,
+    ) as handle:
         yield handle
 
 
@@ -75,7 +78,9 @@ async def test_mllama_load(mllama, generate_load, response_snapshot):
                 },
             ],
         )
-        for i in range(4)
+        # TODO with v3, 4 breaks here. Nothing accounts of the image VRAM
+        # because mllama is the only one doing its thing.
+        for i in range(2)
     ]
     responses = await asyncio.gather(*futures)
 
diff --git a/launcher/src/main.rs b/launcher/src/main.rs
index 1a3c5c39..9001a4d5 100644
--- a/launcher/src/main.rs
+++ b/launcher/src/main.rs
@@ -201,6 +201,11 @@ struct Config {
 
 impl Config {
     fn flop(&self) -> Option<u64> {
+        if self.vision_config.is_some() {
+            // VLM are much harder to predict and VRAM requirements
+            // Are more complex.
+            return None;
+        }
         let num_heads = self.num_heads? as u64;
         let num_kv_heads = self.num_kv_heads? as u64;
         let head_dim = self.head_dim? as u64;
diff --git a/router/src/config.rs b/router/src/config.rs
index 9c31e6e8..5d07a293 100644
--- a/router/src/config.rs
+++ b/router/src/config.rs
@@ -191,7 +191,7 @@ pub enum Config {
     #[serde(rename = "phi-msft")]
     PhiMsft,
     Phi3,
-    PhiMoe,
+    Phimoe,
     Llama,
     Baichuan,
     Paligemma(Paligemma),
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
index 6e941a4e..8989110a 100644
--- a/server/text_generation_server/models/flash_causal_lm.py
+++ b/server/text_generation_server/models/flash_causal_lm.py
@@ -1598,6 +1598,8 @@ class FlashCausalLM(Model):
                     if max_input_tokens is None
                     else max_input_tokens
                 )
+        elif max_input_tokens is None:
+            max_input_tokens = max_total_tokens - 1
 
         del _batch, batch
         self.kv_cache = []