mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 07:42:06 +00:00
[gaudi] HuggingFaceM4/idefics2-8b issue fix (#3264)
Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
e07056ab3f
commit
a220e57f45
@ -111,7 +111,7 @@ class MistralAttention(torch.nn.Module):
|
|||||||
)
|
)
|
||||||
self.num_heads = config.num_attention_heads
|
self.num_heads = config.num_attention_heads
|
||||||
self.hidden_size = config.hidden_size
|
self.hidden_size = config.hidden_size
|
||||||
if hasattr(config, "head_dim"):
|
if getattr(config, "head_dim", None) is not None:
|
||||||
self.head_size = config.head_dim
|
self.head_size = config.head_dim
|
||||||
else:
|
else:
|
||||||
self.head_size = self.hidden_size // self.num_heads
|
self.head_size = self.hidden_size // self.num_heads
|
||||||
|
@ -1050,8 +1050,6 @@ class FlashVlmCausalLM(FlashCausalLM):
|
|||||||
attention_mask=attention_mask_forward,
|
attention_mask=attention_mask_forward,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
)
|
)
|
||||||
if batch.prefill_cache_indices is not None:
|
|
||||||
batch.prefill_cache_indices = None
|
|
||||||
batch.image_grid_thw = None
|
batch.image_grid_thw = None
|
||||||
batch.free_encoder_cache()
|
batch.free_encoder_cache()
|
||||||
return logits, speculative_logits
|
return logits, speculative_logits
|
||||||
|
@ -4,8 +4,8 @@ import os
|
|||||||
import glob
|
import glob
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from optimum.habana.utils import to_gb_rounded
|
|
||||||
import habana_frameworks.torch as htorch
|
import habana_frameworks.torch as htorch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
START_TS = None
|
START_TS = None
|
||||||
DBG_TRACE_FILENAME = os.environ.get("DBG_TRACE_FILENAME")
|
DBG_TRACE_FILENAME = os.environ.get("DBG_TRACE_FILENAME")
|
||||||
@ -14,6 +14,19 @@ if "GRAPH_VISUALIZATION" in os.environ:
|
|||||||
os.remove(f)
|
os.remove(f)
|
||||||
|
|
||||||
|
|
||||||
|
def to_gb_rounded(mem: float) -> float:
|
||||||
|
"""
|
||||||
|
Rounds and converts to GB.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
mem (float): memory in bytes
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: memory in GB rounded to the second decimal
|
||||||
|
"""
|
||||||
|
return np.round(mem / 1024**3, 2)
|
||||||
|
|
||||||
|
|
||||||
def count_hpu_graphs():
|
def count_hpu_graphs():
|
||||||
return len(glob.glob(".graph_dumps/*PreGraph*"))
|
return len(glob.glob(".graph_dumps/*PreGraph*"))
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user