remove support chunking for paged

2025-09-12 04:44:52 +00:00 · 2024-10-11 15:19:14 +02:00 · 2024-10-11 15:19:14 +02:00 · 5e70158b2c
commit 5e70158b2c
parent df98299919
1 changed files with 6 additions and 0 deletions
--- a/server/text_generation_server/models/model.py
+++ b/server/text_generation_server/models/model.py
@ -72,6 +72,12 @@ class Model(ABC):
                "Prefill chunking will be turned off",
            )
            support_chunking = False
+        if ATTENTION not in ["flashinfer", "flashdecoding"] and support_chunking:
+            log_master(
+                logger.warning,
+                "Prefill chunking is only supported with `flashinfer` or `flashdecoding` attention types.",
+            )
+            support_chunking = False

        self.support_chunking = support_chunking
        set_support_chunking(support_chunking)