add XPU path in apply_rotary_pos_emb_flashattn for Keye-VL models (vllm-project#7)

faaany · Copilot · web-flow · commit 6dba3b6ec4e4 · 2025-11-06T14:17:57.000+08:00
* add xpu path

Signed-off-by: Lin, Fanli &lt;fanli.lin@intel.com&gt;

* use partial to create a function wrapper

Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;

---------

Signed-off-by: Lin, Fanli &lt;fanli.lin@intel.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py
@@ -345,6 +345,10 @@ def apply_rotary_pos_emb_flashatt(
         from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb
     elif current_platform.is_rocm():
         from flash_attn.ops.triton.rotary import apply_rotary as apply_rotary_emb
+    else:
+        # For XPU and other platforms, use PyTorch fallback
+        from vllm.model_executor.layers.rotary_embedding.common import apply_rotary_emb_torch
+        apply_rotary_emb = partial(apply_rotary_emb_torch, is_neox_style=True)
 
     q_embed = apply_rotary_emb(q.float(), cos.float(), sin.float()).type_as(q)
     k_embed = apply_rotary_emb(k.float(), cos.float(), sin.float()).type_as(k)