mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
[gaudi] HuggingFaceM4/idefics2-8b issue fix
batch.prefill_cache_indices is reset in generate_token instead of forward, so that position_id could be updated correctly Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
This commit is contained in:
parent
e07056ab3f
commit
1e56e5fe5c
@ -111,7 +111,7 @@ class MistralAttention(torch.nn.Module):
|
||||
)
|
||||
self.num_heads = config.num_attention_heads
|
||||
self.hidden_size = config.hidden_size
|
||||
if hasattr(config, "head_dim"):
|
||||
if getattr(config, "head_dim", None) is not None:
|
||||
self.head_size = config.head_dim
|
||||
else:
|
||||
self.head_size = self.hidden_size // self.num_heads
|
||||
|
@ -1050,8 +1050,6 @@ class FlashVlmCausalLM(FlashCausalLM):
|
||||
attention_mask=attention_mask_forward,
|
||||
**kwargs,
|
||||
)
|
||||
if batch.prefill_cache_indices is not None:
|
||||
batch.prefill_cache_indices = None
|
||||
batch.image_grid_thw = None
|
||||
batch.free_encoder_cache()
|
||||
return logits, speculative_logits
|
||||
|
Loading…
Reference in New Issue
Block a user