mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-10 20:04:52 +00:00
Dummy workaround for CPU.
This commit is contained in:
parent
1987d37603
commit
14bbd311c1
@ -18,7 +18,10 @@ from accelerate import init_empty_weights
|
|||||||
|
|
||||||
from text_generation_server.utils.gptq.quant_linear import QuantLinear
|
from text_generation_server.utils.gptq.quant_linear import QuantLinear
|
||||||
|
|
||||||
major, _minor = torch.cuda.get_device_capability()
|
try:
|
||||||
|
major, _minor = torch.cuda.get_device_capability()
|
||||||
|
except Exception:
|
||||||
|
major = 1
|
||||||
HAS_EXLLAMA = False
|
HAS_EXLLAMA = False
|
||||||
CAN_EXLLAMA = major >= 8
|
CAN_EXLLAMA = major >= 8
|
||||||
if os.getenv("DISABLE_EXLLAMA") == "True":
|
if os.getenv("DISABLE_EXLLAMA") == "True":
|
||||||
|
Loading…
Reference in New Issue
Block a user