mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 11:54:52 +00:00
Give escape hatch to not use exllama kernels even if available.
This commit is contained in:
parent
8cf7c89910
commit
7faef69015
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import torch
|
||||
import torch.distributed
|
||||
|
||||
@ -17,6 +18,8 @@ from accelerate import init_empty_weights
|
||||
|
||||
from text_generation_server.utils.gptq.quant_linear import QuantLinear
|
||||
HAS_EXLLAMA = True
|
||||
if os.getenv("DISABLE_EXLLAMA") == "True":
|
||||
HAS_EXLLAMA=False
|
||||
try:
|
||||
from text_generation_server.utils.gptq.exllama import Ex4bitLinear
|
||||
except ImportError:
|
||||
|
Loading…
Reference in New Issue
Block a user