mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Disabling exllama on old compute.
This commit is contained in:
parent
c8bbbd8129
commit
1987d37603
@ -18,13 +18,17 @@ from accelerate import init_empty_weights
|
|||||||
|
|
||||||
from text_generation_server.utils.gptq.quant_linear import QuantLinear
|
from text_generation_server.utils.gptq.quant_linear import QuantLinear
|
||||||
|
|
||||||
HAS_EXLLAMA = True
|
major, _minor = torch.cuda.get_device_capability()
|
||||||
|
HAS_EXLLAMA = False
|
||||||
|
CAN_EXLLAMA = major >= 8
|
||||||
if os.getenv("DISABLE_EXLLAMA") == "True":
|
if os.getenv("DISABLE_EXLLAMA") == "True":
|
||||||
HAS_EXLLAMA = False
|
HAS_EXLLAMA = False
|
||||||
try:
|
elif CAN_EXLLAMA:
|
||||||
from text_generation_server.utils.gptq.exllama import Ex4bitLinear
|
try:
|
||||||
except ImportError:
|
from text_generation_server.utils.gptq.exllama import Ex4bitLinear
|
||||||
HAS_EXLLAMA = False
|
HAS_EXLLAMA = True
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
|
@ -170,10 +170,10 @@ class Weights:
|
|||||||
"Cannot load `gptq` weight, make sure the model is already quantized, or quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
|
"Cannot load `gptq` weight, make sure the model is already quantized, or quantize it with `text-generation-server quantize ORIGINAL_MODEL_ID NEW_MODEL_ID`"
|
||||||
)
|
)
|
||||||
|
|
||||||
from text_generation_server.utils.layers import HAS_EXLLAMA
|
from text_generation_server.utils.layers import HAS_EXLLAMA, CAN_EXLLAMA
|
||||||
|
|
||||||
if use_exllama:
|
if use_exllama:
|
||||||
if not HAS_EXLLAMA:
|
if not HAS_EXLLAMA and CAN_EXLLAMA:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
|
"Exllama GPTQ cuda kernels (which are faster) could have been used, but are not currently installed, try using BUILD_EXTENSIONS=True"
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user