diff --git a/server/text_generation_server/models/globals.py b/server/text_generation_server/models/globals.py index fbe35828..61ff6a13 100644 --- a/server/text_generation_server/models/globals.py +++ b/server/text_generation_server/models/globals.py @@ -15,9 +15,9 @@ PREFIX_CACHING = os.environ["PREFIX_CACHING"].lower() in { PREFILL_CHUNKING = os.getenv("PREFILL_CHUNKING", "1").lower() in {"1", "true"} log_master(logger.info, f"Using prefix caching = {PREFIX_CACHING}") _expected = {"paged", "flashdecoding", "flashdecoding-ipex", "flashinfer"} -assert ATTENTION in _expected, ( - f"Attention is not valid {ATTENTION}, expected {_expected}" -) +assert ( + ATTENTION in _expected +), f"Attention is not valid {ATTENTION}, expected {_expected}" log_master(logger.info, f"Using Attention = {ATTENTION}") if PREFIX_CACHING and ATTENTION not in {