mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
set flashdecoding blocksize as 64
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
d04c86c76c
commit
4ee4ebc03b
@ -31,10 +31,11 @@ assert TGI_WIGGLE_ROOM < 1
|
||||
|
||||
# This is overridden by the cli
|
||||
BLOCK_SIZE: int
|
||||
if SYSTEM == "ipex":
|
||||
BLOCK_SIZE = 16
|
||||
elif ATTENTION == "flashdecoding":
|
||||
BLOCK_SIZE = 256
|
||||
if ATTENTION == "flashdecoding":
|
||||
if SYSTEM == "ipex":
|
||||
BLOCK_SIZE = 64
|
||||
else:
|
||||
BLOCK_SIZE = 256
|
||||
elif ATTENTION == "flashinfer":
|
||||
BLOCK_SIZE = 1
|
||||
else:
|
||||
|
Loading…
Reference in New Issue
Block a user