mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Fix prefix_caching variable, remove defaults in server (confusing a lot
of the times).
This commit is contained in:
parent
594a2b4a3d
commit
5c72f269b6
@ -5,9 +5,9 @@ from typing import Dict, Optional
|
|||||||
|
|
||||||
from text_generation_server.utils.log import log_master
|
from text_generation_server.utils.log import log_master
|
||||||
|
|
||||||
ATTENTION = os.getenv("ATTENTION", "flashinfer")
|
ATTENTION = os.environ["ATTENTION"]
|
||||||
default_prefix_caching = "1" if ATTENTION in {"flashinfer", "flashdecoding"} else "0"
|
# default_prefix_caching = "1" if ATTENTION in {"flashinfer", "flashdecoding"} else "0"
|
||||||
PREFIX_CACHING = os.getenv("PREFIX_CACHING", default_prefix_caching).lower() in {
|
PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in {
|
||||||
"1",
|
"1",
|
||||||
"true",
|
"true",
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user