Dummy workaround for CPU.

2025-09-10 20:04:52 +00:00 · 2023-09-06 14:35:02 +02:00 · 2023-09-06 14:35:02 +02:00 · 14bbd311c1
commit 14bbd311c1
parent 1987d37603
1 changed files with 4 additions and 1 deletions
--- a/server/text_generation_server/utils/layers.py
+++ b/server/text_generation_server/utils/layers.py
@ -18,7 +18,10 @@ from accelerate import init_empty_weights
 from text_generation_server.utils.gptq.quant_linear import QuantLinear
-major, _minor = torch.cuda.get_device_capability()
+try:
    major, _minor = torch.cuda.get_device_capability()
 except Exception:
    major = 1
 HAS_EXLLAMA = False
 CAN_EXLLAMA = major >= 8
 if os.getenv("DISABLE_EXLLAMA") == "True":