From 5e70158b2c7bef801933cca487424aa3d174d3a5 Mon Sep 17 00:00:00 2001
From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com>
Date: Fri, 11 Oct 2024 15:19:14 +0200
Subject: [PATCH] remove support chunking for paged

---
 server/text_generation_server/models/model.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/server/text_generation_server/models/model.py b/server/text_generation_server/models/model.py
index 05d36ba3..1da6e3e3 100644
--- a/server/text_generation_server/models/model.py
+++ b/server/text_generation_server/models/model.py
@@ -72,6 +72,12 @@ class Model(ABC):
                 "Prefill chunking will be turned off",
             )
             support_chunking = False
+        if ATTENTION not in ["flashinfer", "flashdecoding"] and support_chunking:
+            log_master(
+                logger.warning,
+                "Prefill chunking is only supported with `flashinfer` or `flashdecoding` attention types.",
+            )
+            support_chunking = False
 
         self.support_chunking = support_chunking
         set_support_chunking(support_chunking)