mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-12 04:44:52 +00:00
remove support chunking for paged
This commit is contained in:
parent
df98299919
commit
5e70158b2c
@ -72,6 +72,12 @@ class Model(ABC):
|
|||||||
"Prefill chunking will be turned off",
|
"Prefill chunking will be turned off",
|
||||||
)
|
)
|
||||||
support_chunking = False
|
support_chunking = False
|
||||||
|
if ATTENTION not in ["flashinfer", "flashdecoding"] and support_chunking:
|
||||||
|
log_master(
|
||||||
|
logger.warning,
|
||||||
|
"Prefill chunking is only supported with `flashinfer` or `flashdecoding` attention types.",
|
||||||
|
)
|
||||||
|
support_chunking = False
|
||||||
|
|
||||||
self.support_chunking = support_chunking
|
self.support_chunking = support_chunking
|
||||||
set_support_chunking(support_chunking)
|
set_support_chunking(support_chunking)
|
||||||
|
Loading…
Reference in New Issue
Block a user