mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-22 15:32:08 +00:00
Disable tensor caching in HPU Graph execution (#4)
This commit is contained in:
parent
b1897acfd6
commit
e3dcd7f2c2
@ -632,7 +632,7 @@ class CausalLM(Model):
|
||||
model = model.eval().to(device)
|
||||
#wrap in hpu_graph only if self.enable_hpu_graph is set
|
||||
if self.enable_hpu_graph:
|
||||
model = wrap_in_hpu_graph(model)
|
||||
model = wrap_in_hpu_graph(model, disable_tensor_cache=True)
|
||||
|
||||
if model.config.model_type in MODELS_OPTIMIZED_WITH_STATIC_SHAPES:
|
||||
self.is_optimized_for_gaudi = True
|
||||
|
Loading…
Reference in New Issue
Block a user