From 7faef69015bf385bccf059a0f331df337f76995e Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Thu, 20 Jul 2023 17:47:09 +0000 Subject: [PATCH] Give escape hatch to not use exllama kernels even if available. --- server/text_generation_server/utils/layers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index 40bab6ab..4f280161 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -1,3 +1,4 @@ +import os import torch import torch.distributed @@ -17,6 +18,8 @@ from accelerate import init_empty_weights from text_generation_server.utils.gptq.quant_linear import QuantLinear HAS_EXLLAMA = True +if os.getenv("DISABLE_EXLLAMA") == "True": + HAS_EXLLAMA=False try: from text_generation_server.utils.gptq.exllama import Ex4bitLinear except ImportError: