Dummy workaround for CPU.

This commit is contained in:
Nicolas Patry 2023-09-06 14:35:02 +02:00
parent 1987d37603
commit 14bbd311c1

View File

@ -18,7 +18,10 @@ from accelerate import init_empty_weights
from text_generation_server.utils.gptq.quant_linear import QuantLinear
try:
major, _minor = torch.cuda.get_device_capability()
except Exception:
major = 1
HAS_EXLLAMA = False
CAN_EXLLAMA = major >= 8
if os.getenv("DISABLE_EXLLAMA") == "True":