diff --git a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py index a9cfc065..231d02b5 100644 --- a/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py +++ b/server/text_generation_server/models/custom_modeling/qwen2_5_vl.py @@ -460,7 +460,7 @@ class Qwen2_5VLAttention(nn.Module): # execute flash attention if SYSTEM == "ipex": attn_output = torch.empty_like(query) - if query.device.dtype == "xpu": + if query.device.type == "xpu": ipex.llm.functional.varlen_attention( query.contiguous(), key.contiguous(),