From b03d2621a79f5bd13cba0029879c8671cdd2a0d0 Mon Sep 17 00:00:00 2001 From: Florian Zimmermeister Date: Thu, 7 Sep 2023 10:19:42 +0200 Subject: [PATCH] add transformers gptq support (#963) Proposal to fix https://github.com/huggingface/text-generation-inference/issues/962 --- .../text_generation_server/utils/weights.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/server/text_generation_server/utils/weights.py b/server/text_generation_server/utils/weights.py index 71cc1971..9d47a7d3 100644 --- a/server/text_generation_server/utils/weights.py +++ b/server/text_generation_server/utils/weights.py @@ -223,7 +223,7 @@ class Weights: return bits, groupsize def _set_gptq_params(self, model_id): - filename = "quantize_config.json" + filename = "config.json" try: if os.path.exists(os.path.join(model_id, filename)): filename = os.path.join(model_id, filename) @@ -231,7 +231,18 @@ class Weights: filename = hf_hub_download(model_id, filename=filename) with open(filename, "r") as f: data = json.load(f) - self.gptq_bits = data["bits"] - self.gptq_groupsize = data["group_size"] + self.gptq_bits = data["quantization_config"]["bits"] + self.gptq_groupsize = data["quantization_config"]["group_size"] except Exception: - pass + filename = "quantize_config.json" + try: + if os.path.exists(os.path.join(model_id, filename)): + filename = os.path.join(model_id, filename) + else: + filename = hf_hub_download(model_id, filename=filename) + with open(filename, "r") as f: + data = json.load(f) + self.gptq_bits = data["bits"] + self.gptq_groupsize = data["group_size"] + except Exception: + pass