set flashdecoding blocksize as 64

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi A 2024-12-01 18:55:05 -08:00
parent d04c86c76c
commit 4ee4ebc03b

View File

@ -31,10 +31,11 @@ assert TGI_WIGGLE_ROOM < 1
# This is overridden by the cli
BLOCK_SIZE: int
if SYSTEM == "ipex":
BLOCK_SIZE = 16
elif ATTENTION == "flashdecoding":
BLOCK_SIZE = 256
if ATTENTION == "flashdecoding":
if SYSTEM == "ipex":
BLOCK_SIZE = 64
else:
BLOCK_SIZE = 256
elif ATTENTION == "flashinfer":
BLOCK_SIZE = 1
else: