From 326f8e30ac0ad656af954d4705ecc9f7823a4cfc Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Fri, 9 Feb 2024 09:44:53 +0000 Subject: [PATCH] Better error message on non rocm. --- server/text_generation_server/utils/layers.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index b9b1dfac..01e32588 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -354,6 +354,8 @@ def get_linear(weight, bias, quantize): "AWQ GEMM kernel can't be used on ROCm systems, please use `--quantize gptq` instead " "to use Exllama/GPTQ kernels for AWQ inference." ) + if not HAS_AWQ: + raise NotImplementedError("You do not seem to have awq installed, either install it (cd server && make install-awq), or try using GPTQ `---quantize gptq` a conversion AWQ->GPTQ will happen on the fly") linear = WQLinear( w_bit=bits, group_size=groupsize,