mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-23 16:02:10 +00:00
fix: fix quant linear autotune
This commit is contained in:
parent
28fcdcca6d
commit
b3c2d7291e
@ -88,7 +88,7 @@ class Autotuner(triton.KernelInterface):
|
||||
# In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
|
||||
# PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
|
||||
return triton.testing.do_bench(
|
||||
kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40
|
||||
kernel_call, quantiles=(0.5, 0.2, 0.8), rep=40
|
||||
)
|
||||
except triton.OutOfResources:
|
||||
return (float("inf"), float("inf"), float("inf"))
|
||||
|
Loading…
Reference in New Issue
Block a user