diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 40bab6ab..4f280161 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -1,3 +1,4 @@ +import os import torch import torch.distributed @@ -17,6 +18,8 @@ from accelerate import init_empty_weights from text_generation_server.utils.gptq.quant_linear import QuantLinear HAS_EXLLAMA = True +if os.getenv("DISABLE_EXLLAMA") == "True": + HAS_EXLLAMA=False try: from text_generation_server.utils.gptq.exllama import Ex4bitLinear except ImportError: