From dbb24255c34af02325b7cbb405c172da641fa094 Mon Sep 17 00:00:00 2001 From: "Wang, Yi A" Date: Thu, 5 Jun 2025 23:14:15 -0700 Subject: [PATCH] fix multi-modality concatenate Signed-off-by: Wang, Yi A --- .../text_generation_server/models/flash_vlm_causal_lm.py | 4 ++-- .../server/text_generation_server/models/mllama_causal_lm.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py b/backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py index e604fd3c..9755ee6d 100644 --- a/backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py +++ b/backends/gaudi/server/text_generation_server/models/flash_vlm_causal_lm.py @@ -262,8 +262,8 @@ class FlashVlmCausalLMBatch(FlashCausalLMBatch): @classmethod @tracer.start_as_current_span("concatenate") - def concatenate(cls, batches): - batch = super(FlashVlmCausalLMBatch, cls).concatenate(batches) + def concatenate(cls, batches, padded_total_bs: int = 0): + batch = super(FlashVlmCausalLMBatch, cls).concatenate(batches, padded_total_bs) batch.pixel_values = None batch.pixel_attention_mask = None batch.image_sizes = None diff --git a/backends/gaudi/server/text_generation_server/models/mllama_causal_lm.py b/backends/gaudi/server/text_generation_server/models/mllama_causal_lm.py index 771cc0a8..13939974 100644 --- a/backends/gaudi/server/text_generation_server/models/mllama_causal_lm.py +++ b/backends/gaudi/server/text_generation_server/models/mllama_causal_lm.py @@ -48,8 +48,8 @@ class FlashMllamaCausalLMBatch(FlashVlmCausalLMBatch): @classmethod @tracer.start_as_current_span("concatenate") - def concatenate(cls, batches): - batch = super().concatenate(batches) + def concatenate(cls, batches, padded_total_bs: int = 0): + batch = super().concatenate(batches, padded_total_bs) batch.pixel_values = None batch.pixel_attention_mask = None