fix Qwen VL break in intel platform

Signed-off-by: Wang, Yi A <yi.a.wang@intel.com>
2025-07-29 03:10:18 +00:00 · 2025-02-09 18:01:20 -08:00 · 2025-02-09 18:01:20 -08:00 · 57385c5463
commit 57385c5463
parent 4b8cda684b
1 changed files with 13 additions and 7 deletions
--- a/server/text_generation_server/layers/rotary.py
+++ b/server/text_generation_server/layers/rotary.py
@ -577,14 +577,20 @@ class RotaryPositionEmbeddingMultimodalSections(PositionRotaryEmbedding):
        cos: torch.Tensor,
        sin: torch.Tensor,
    ):
        # rotate half the sequence length
        rot = cos.shape[-1] // 2
        q2 = torch.cat([-query[..., rot:], query[..., :rot]], dim=-1)
        k2 = torch.cat([-key[..., rot:], key[..., :rot]], dim=-1)
-        # apply the rotation
+        if SYSTEM == "ipex":
-        rotary_emb.apply_rotary(query, q2, cos, sin, query, q2, True)
+            ipex.llm.functional.rotary_embedding(
-        rotary_emb.apply_rotary(key, k2, cos, sin, key, k2, True)
+                query, key, sin, cos, query.size(-1), True
            )
        else:
            # rotate half the sequence length
            rot = cos.shape[-1] // 2
            q2 = torch.cat([-query[..., rot:], query[..., :rot]], dim=-1)
            k2 = torch.cat([-key[..., rot:], key[..., :rot]], dim=-1)
            # apply the rotation
            rotary_emb.apply_rotary(query, q2, cos, sin, query, q2, True)
            rotary_emb.apply_rotary(key, k2, cos, sin, key, k2, True)
    def _update_cos_sin_cache(
        self, dtype: torch.dtype, device: torch.device, seqlen: int