mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
chunking by default.
This commit is contained in:
parent
9fab7c6665
commit
db1114955a
@ -12,7 +12,7 @@ PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in {
|
|||||||
"1",
|
"1",
|
||||||
"true",
|
"true",
|
||||||
}
|
}
|
||||||
PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "0").lower() in {"1", "true"}
|
PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "1").lower() in {"1", "true"}
|
||||||
log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
|
log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}")
|
||||||
_expected = {"paged", "flashdecoding", "flashinfer"}
|
_expected = {"paged", "flashdecoding", "flashinfer"}
|
||||||
assert (
|
assert (
|
||||||
|
Loading…
Reference in New Issue
Block a user