remove support chunking for paged

This commit is contained in:
OlivierDehaene 2024-10-11 15:19:14 +02:00
parent df98299919
commit 5e70158b2c
No known key found for this signature in database
GPG Key ID: BB104D67809DA93C

View File

@ -72,6 +72,12 @@ class Model(ABC):
"Prefill chunking will be turned off", "Prefill chunking will be turned off",
) )
support_chunking = False support_chunking = False
if ATTENTION not in ["flashinfer", "flashdecoding"] and support_chunking:
log_master(
logger.warning,
"Prefill chunking is only supported with `flashinfer` or `flashdecoding` attention types.",
)
support_chunking = False
self.support_chunking = support_chunking self.support_chunking = support_chunking
set_support_chunking(support_chunking) set_support_chunking(support_chunking)