diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index b6bf0a4b..c37a8c7b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -178,7 +178,6 @@ class FlashLlamaAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index 5ce80be6..0a0bfce7 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -167,7 +167,6 @@ class FlashNeoxAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py index 051e0c66..96fa1b8a 100644 --- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py @@ -201,7 +201,6 @@ class FlashRWAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode @@ -336,7 +335,6 @@ class FlashRWLargeAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py index 925bd23c..165725c1 100644 --- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py @@ -289,7 +289,6 @@ class FlashMQAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode