mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-24 16:32:12 +00:00
Reducing number of reps while autotuning.
This commit is contained in:
parent
7de104b7f6
commit
fb0840944c
@ -69,7 +69,7 @@ class Autotuner(triton.KernelInterface):
|
|||||||
try:
|
try:
|
||||||
# In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
|
# In testings using only 40 reps seems to be close enough and it appears to be what PyTorch uses
|
||||||
# PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
|
# PyTorch also sets fast_flush to True, but I didn't see any speedup so I'll leave the default
|
||||||
return triton.testing.do_bench(kernel_call, percentiles=(0.5, 0.2, 0.8), rep=40)
|
return triton.testing.do_bench(kernel_call, percentiles=(0.5, 0.2, 0.8), rep=10)
|
||||||
except triton.compiler.OutOfResources:
|
except triton.compiler.OutOfResources:
|
||||||
return (float('inf'), float('inf'), float('inf'))
|
return (float('inf'), float('inf'), float('inf'))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user