Early exit on server too.

This commit is contained in:
Nicolas Patry 2024-08-09 12:47:39 +02:00
parent 6bcad66c6e
commit d84b98b40f
No known key found for this signature in database
GPG Key ID: 64AF4752B2967863
2 changed files with 3 additions and 0 deletions

1
.gitignore vendored
View File

@ -18,3 +18,4 @@ server/exllama_kernels/exllama_kernels/exllama_ext_hip.cpp
data/ data/
load_tests/*.json load_tests/*.json
server/fbgemmm

View File

@ -6,6 +6,8 @@ from typing import Dict, Optional
from text_generation_server.utils.log import log_master from text_generation_server.utils.log import log_master
ATTENTION = os.getenv("ATTENTION", "paged") ATTENTION = os.getenv("ATTENTION", "paged")
_expected = {"paged", "flashdecoding", "flashinfer"}
assert ATTENTION in _expected, f"Attention is not valid {ATTENTION}, expected {_expected}"
log_master(logger.info, f"Using Attention = {ATTENTION}") log_master(logger.info, f"Using Attention = {ATTENTION}")
MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None MEM_POOL = torch.cuda.graph_pool_handle() if torch.cuda.is_available() else None