mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
feat: sort cuda graphs in descending order (#2104)
This commit is contained in:
parent
f0ed8d294f
commit
d930724e82
@ -15,6 +15,13 @@ if cuda_graphs is not None:
|
|||||||
else:
|
else:
|
||||||
cuda_graphs = None
|
cuda_graphs = None
|
||||||
|
|
||||||
|
|
||||||
|
# sorting the cuda graphs in descending order helps reduce the
|
||||||
|
# memory impact and results in less memory usage
|
||||||
|
if cuda_graphs is not None:
|
||||||
|
cuda_graphs.sort(reverse=True)
|
||||||
|
|
||||||
|
|
||||||
CUDA_GRAPHS = cuda_graphs
|
CUDA_GRAPHS = cuda_graphs
|
||||||
|
|
||||||
# This is overridden at model loading.
|
# This is overridden at model loading.
|
||||||
|
Loading…
Reference in New Issue
Block a user