diff --git a/server/text_generation_server/utils/layers.py b/server/text_generation_server/utils/layers.py index b9b1dfac..01e32588 100644 --- a/server/text_generation_server/utils/layers.py +++ b/server/text_generation_server/utils/layers.py @@ -354,6 +354,8 @@ def get_linear(weight, bias, quantize): "AWQ GEMM kernel can't be used on ROCm systems, please use `--quantize gptq` instead " "to use Exllama/GPTQ kernels for AWQ inference." ) + if not HAS_AWQ: + raise NotImplementedError("You do not seem to have awq installed, either install it (cd server && make install-awq), or try using GPTQ `---quantize gptq` a conversion AWQ->GPTQ will happen on the fly") linear = WQLinear( w_bit=bits, group_size=groupsize,