mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-21 23:12:07 +00:00
fix(server): fix cohere (#2249)
This commit is contained in:
parent
e0710ccbeb
commit
118ee57f82
@ -259,8 +259,8 @@ class FlashCohereAttention(torch.nn.Module):
|
||||
cu_seqlen_prefill,
|
||||
kv_cache,
|
||||
block_tables,
|
||||
input_lengths,
|
||||
slots,
|
||||
input_lengths,
|
||||
max_s,
|
||||
):
|
||||
qkv = self.query_key_value(hidden_states)
|
||||
|
Loading…
Reference in New Issue
Block a user