After rebase.

2025-09-10 11:54:52 +00:00 · 2023-07-25 09:14:47 +02:00 · 2023-07-25 09:14:47 +02:00 · 0635d0e245
commit 0635d0e245
parent 95583ee257
1 changed files with 2 additions and 5 deletions
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@ -187,11 +187,8 @@ class Weights:
                    qzeros = self.get_sharded(f"{prefix}.qzeros", dim=0)
                    scales = self.get_sharded(f"{prefix}.scales", dim=0)
                else:
-                    raise RuntimeError(
-                        "Using exllama GPTQ kernel with groupsize<1 is not supported"
-                    )
-                    # qzeros = self.get_tensor(f"{prefix}.qzeros")
-                    # scales = self.get_tensor(f"{prefix}.scales")
+                    qzeros = self.get_tensor(f"{prefix}.qzeros")
+                    scales = self.get_tensor(f"{prefix}.scales")

                # For tp > 1, at this point we know we do not use act-order
                if self.process_group.size() == 1: