mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-26 12:32:10 +00:00
Fixing linting on main. (#2719)
This commit is contained in:
parent
aadc9cb485
commit
9fde566602
@ -1729,9 +1729,11 @@ class FlashCausalLM(Model):
|
|||||||
# Slots can be discontiguous when prefix caching is enabled, so we need to expand the slot_indices,
|
# Slots can be discontiguous when prefix caching is enabled, so we need to expand the slot_indices,
|
||||||
# then update the slots with the additional indices to ensure we're grabbing the ones that have been
|
# then update the slots with the additional indices to ensure we're grabbing the ones that have been
|
||||||
# allocated
|
# allocated
|
||||||
slot_indices = (batch.slot_indices.unsqueeze(-1).expand(B, new_length) + arange_int).view(-1)
|
slot_indices = (
|
||||||
|
batch.slot_indices.unsqueeze(-1).expand(B, new_length) + arange_int
|
||||||
|
).view(-1)
|
||||||
slots = batch.slots[slot_indices]
|
slots = batch.slots[slot_indices]
|
||||||
|
|
||||||
input_lengths = (
|
input_lengths = (
|
||||||
input_lengths.unsqueeze(-1).expand(B, new_length) + arange_int
|
input_lengths.unsqueeze(-1).expand(B, new_length) + arange_int
|
||||||
).view(-1)
|
).view(-1)
|
||||||
|
Loading…
Reference in New Issue
Block a user