From 2d4b31070eac80edb33573656e2753f041c685de Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Mon, 17 Jul 2023 17:39:45 +0200 Subject: [PATCH] fix --- .../models/custom_modeling/flash_llama_modeling.py | 1 - .../models/custom_modeling/flash_neox_modeling.py | 1 - .../models/custom_modeling/flash_rw_modeling.py | 2 -- .../models/custom_modeling/flash_santacoder_modeling.py | 1 - 4 files changed, 5 deletions(-) diff --git a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py index b6bf0a4b..c37a8c7b 100644 --- a/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_llama_modeling.py @@ -178,7 +178,6 @@ class FlashLlamaAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode diff --git a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py index 5ce80be6..0a0bfce7 100644 --- a/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_neox_modeling.py @@ -167,7 +167,6 @@ class FlashNeoxAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode diff --git a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py index 051e0c66..96fa1b8a 100644 --- a/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_rw_modeling.py @@ -201,7 +201,6 @@ class FlashRWAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode @@ -336,7 +335,6 @@ class FlashRWLargeAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode diff --git a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py index 925bd23c..165725c1 100644 --- a/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py +++ b/server/text_generation_server/models/custom_modeling/flash_santacoder_modeling.py @@ -289,7 +289,6 @@ class FlashMQAttention(torch.nn.Module): False, True, False, - 0, None, ) # Decode