Fix GPTQ autotune data type to be compatible with Torch 2.4.0

2025-09-12 04:44:52 +00:00 · 2024-07-25 09:39:42 +00:00 · 2024-07-25 09:39:42 +00:00 · fa9221f28d
commit fa9221f28d
parent 26614057a7
1 changed files with 1 additions and 1 deletions
--- a/server/text_generation_server/layers/gptq/custom_autotune.py
+++ b/server/text_generation_server/layers/gptq/custom_autotune.py
@ -91,7 +91,7 @@ class Autotuner(triton.KernelInterface):
                kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
            )
        except triton.OutOfResources:
-            return (float("inf"), float("inf"), float("inf"))
+            return [float("inf"), float("inf"), float("inf")]

    def run(self, *args, **kwargs):
        self.nargs = dict(zip(self.arg_names, args))