add transformers gptq support (#963)

Proposal to fix https://github.com/huggingface/text-generation-inference/issues/962
2025-09-09 03:14:53 +00:00 · 2023-09-07 10:19:42 +02:00 · 2023-09-07 10:19:42 +02:00 · b03d2621a7
commit b03d2621a7
parent 935a77fb74
1 changed files with 15 additions and 4 deletions
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@ -223,6 +223,17 @@ class Weights:
        return bits, groupsize
    def _set_gptq_params(self, model_id):
        filename = "config.json"
        try:
            if os.path.exists(os.path.join(model_id, filename)):
                filename = os.path.join(model_id, filename)
            else:
                filename = hf_hub_download(model_id, filename=filename)
            with open(filename, "r") as f:
                data = json.load(f)
            self.gptq_bits = data["quantization_config"]["bits"]
            self.gptq_groupsize = data["quantization_config"]["group_size"]
        except Exception:
            filename = "quantize_config.json"
            try:
                if os.path.exists(os.path.join(model_id, filename)):