mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Fix prefix_caching variable, remove defaults in server (confusing a lot
of the times).
This commit is contained in:
parent
594a2b4a3d
commit
5c72f269b6
@ -5,9 +5,9 @@ from typing import Dict, Optional
|
||||
|
||||
from text_generation_server.utils.log import log_master
|
||||
|
||||
ATTENTION = os.getenv("ATTENTION", "flashinfer")
|
||||
default_prefix_caching = "1" if ATTENTION in {"flashinfer", "flashdecoding"} else "0"
|
||||
PREFIX_CACHING = os.getenv("PREFIX_CACHING", default_prefix_caching).lower() in {
|
||||
ATTENTION = os.environ["ATTENTION"]
|
||||
# default_prefix_caching = "1" if ATTENTION in {"flashinfer", "flashdecoding"} else "0"
|
||||
PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in {
|
||||
"1",
|
||||
"true",
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user