mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
fix value
This commit is contained in:
parent
15fdd40587
commit
9904f66966
@ -319,7 +319,7 @@ def launcher(event_loop):
|
|||||||
|
|
||||||
env = {
|
env = {
|
||||||
"LOG_LEVEL": "info,text_generation_router=debug",
|
"LOG_LEVEL": "info,text_generation_router=debug",
|
||||||
"ENABLE_CUDA_GRAPHS": "True",
|
"ENABLE_CUDA_GRAPHS": "true",
|
||||||
}
|
}
|
||||||
if not use_flash_attention:
|
if not use_flash_attention:
|
||||||
env["USE_FLASH_ATTENTION"] = "false"
|
env["USE_FLASH_ATTENTION"] = "false"
|
||||||
|
@ -777,7 +777,7 @@ class FlashCausalLM(Model):
|
|||||||
self.device,
|
self.device,
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.getenv("ENABLE_CUDA_GRAPHS", "False") == "True":
|
if os.getenv("ENABLE_CUDA_GRAPHS", "false") == "true":
|
||||||
try:
|
try:
|
||||||
# Warmup cuda graphs for all power of twos until 64
|
# Warmup cuda graphs for all power of twos until 64
|
||||||
for i in range(6):
|
for i in range(6):
|
||||||
|
Loading…
Reference in New Issue
Block a user