Local gptq support.

2025-09-10 11:54:52 +00:00 · 2023-07-31 09:51:58 +02:00 · 2023-07-31 09:51:58 +02:00 · f29e3d7d34
commit f29e3d7d34
parent 3ef5ffbc64
1 changed files with 6 additions and 1 deletions
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@ -1,3 +1,4 @@
+import os
 from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 from safetensors import safe_open, SafetensorError
@ -221,8 +222,12 @@ class Weights:
        return bits, groupsize

    def _set_gptq_params(self, model_id):
+        filename = "quantize_config.json"
        try:
-            filename = hf_hub_download(model_id, filename="quantize_config.json")
+            if not os.path.exists(os.path.join(model_id, filename)):
+                filename = os.path.join(model_id, filename)
+            else:
+                filename = hf_hub_download(model_id, filename=filename)
            with open(filename, "r") as f:
                data = json.load(f)
            self.gptq_bits = data["bits"]