[gaudi] HuggingFaceM4/idefics2-8b issue fix (#3264)

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
Wang, Yi 2025-06-13 18:00:08 +08:00 committed by GitHub
parent e07056ab3f
commit a220e57f45
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 15 additions and 4 deletions

View File

@ -111,7 +111,7 @@ class MistralAttention(torch.nn.Module):
)
self.num_heads = config.num_attention_heads
self.hidden_size = config.hidden_size
if hasattr(config, "head_dim"):
if getattr(config, "head_dim", None) is not None:
self.head_size = config.head_dim
else:
self.head_size = self.hidden_size // self.num_heads

View File

@ -1050,8 +1050,6 @@ class FlashVlmCausalLM(FlashCausalLM):
attention_mask=attention_mask_forward,
**kwargs,
)
if batch.prefill_cache_indices is not None:
batch.prefill_cache_indices = None
batch.image_grid_thw = None
batch.free_encoder_cache()
return logits, speculative_logits

View File

@ -4,8 +4,8 @@ import os
import glob
import time
from optimum.habana.utils import to_gb_rounded
import habana_frameworks.torch as htorch
import numpy as np
START_TS = None
DBG_TRACE_FILENAME = os.environ.get("DBG_TRACE_FILENAME")
@ -14,6 +14,19 @@ if "GRAPH_VISUALIZATION" in os.environ:
os.remove(f)
def to_gb_rounded(mem: float) -> float:
"""
Rounds and converts to GB.
Args:
mem (float): memory in bytes
Returns:
float: memory in GB rounded to the second decimal
"""
return np.round(mem / 1024**3, 2)
def count_hpu_graphs():
return len(glob.glob(".graph_dumps/*PreGraph*"))