From 5e70158b2c7bef801933cca487424aa3d174d3a5 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Fri, 11 Oct 2024 15:19:14 +0200 Subject: [PATCH] remove support chunking for paged --- server/text_generation_server/models/model.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/server/text_generation_server/models/model.py b/server/text_generation_server/models/model.py index 05d36ba3..1da6e3e3 100644 --- a/server/text_generation_server/models/model.py +++ b/server/text_generation_server/models/model.py @@ -72,6 +72,12 @@ class Model(ABC): "Prefill chunking will be turned off", ) support_chunking = False + if ATTENTION not in ["flashinfer", "flashdecoding"] and support_chunking: + log_master( + logger.warning, + "Prefill chunking is only supported with `flashinfer` or `flashdecoding` attention types.", + ) + support_chunking = False self.support_chunking = support_chunking set_support_chunking(support_chunking)