mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
feat: sort cuda graphs in descending order (#2104)
This commit is contained in:
parent
f0ed8d294f
commit
d930724e82
@ -15,6 +15,13 @@ if cuda_graphs is not None:
|
||||
else:
|
||||
cuda_graphs = None
|
||||
|
||||
|
||||
# sorting the cuda graphs in descending order helps reduce the
|
||||
# memory impact and results in less memory usage
|
||||
if cuda_graphs is not None:
|
||||
cuda_graphs.sort(reverse=True)
|
||||
|
||||
|
||||
CUDA_GRAPHS = cuda_graphs
|
||||
|
||||
# This is overridden at model loading.
|
||||
|
Loading…
Reference in New Issue
Block a user