From 3d71c06affa3fb82210a61cc8036320cfe06f5dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20de=20Kok?= Date: Fri, 11 Apr 2025 12:37:21 +0000 Subject: [PATCH] flashinfer: head_dim -> head_dim_qk --- server/text_generation_server/layers/attention/flashinfer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/text_generation_server/layers/attention/flashinfer.py b/server/text_generation_server/layers/attention/flashinfer.py index 9479b6067..d54e465f5 100644 --- a/server/text_generation_server/layers/attention/flashinfer.py +++ b/server/text_generation_server/layers/attention/flashinfer.py @@ -90,7 +90,7 @@ def use_prefill_with_paged_kv_state( paged_kv_last_page_len=last_page_len, num_qo_heads=num_heads, num_kv_heads=num_kv_heads, - head_dim=head_size, + head_dim_qk=head_size, kv_data_type=kv_dtype, q_data_type=q_dtype, page_size=page_size,