fix(server): fix cohere (#2249)

This commit is contained in:
OlivierDehaene 2024-07-18 14:00:13 +00:00 committed by yuanwu
parent e0710ccbeb
commit 118ee57f82

View File

@ -259,8 +259,8 @@ class FlashCohereAttention(torch.nn.Module):
cu_seqlen_prefill,
kv_cache,
block_tables,
input_lengths,
slots,
input_lengths,
max_s,
):
qkv = self.query_key_value(hidden_states)