mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 12:24:53 +00:00
Remove unecessary cuda graph.
This commit is contained in:
parent
de6cb15fa5
commit
2e754ffd2e
@ -802,7 +802,7 @@ class FlashCausalLM(Model):
|
|||||||
try:
|
try:
|
||||||
logger.info("Experimental support for Cuda Graphs is enabled")
|
logger.info("Experimental support for Cuda Graphs is enabled")
|
||||||
# Warmup cuda graphs
|
# Warmup cuda graphs
|
||||||
for bs in [1, 2, 4] + [8 * i for i in range(8)]:
|
for bs in [1, 2, 4] + [8 * i for i in range(1, 9)]:
|
||||||
if self.speculate is None or self.speculate + 1 <= bs:
|
if self.speculate is None or self.speculate + 1 <= bs:
|
||||||
self.cuda_graph_warmup(bs, max_s, max_bt)
|
self.cuda_graph_warmup(bs, max_s, max_bt)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
Loading…
Reference in New Issue
Block a user