fix(server): fix cohere (#2249)

This commit is contained in:
OlivierDehaene 2024-07-18 14:00:13 +00:00 committed by erikkaum
parent 1c11084e0b
commit 0f8b19db76

View File

@ -259,8 +259,8 @@ class FlashCohereAttention(torch.nn.Module):
cu_seqlen_prefill, cu_seqlen_prefill,
kv_cache, kv_cache,
block_tables, block_tables,
input_lengths,
slots, slots,
input_lengths,
max_s, max_s,
): ):
qkv = self.query_key_value(hidden_states) qkv = self.query_key_value(hidden_states)