From 89ff4e901a6df130a81f25d6fe831a8148e58634 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Mon, 15 May 2023 12:33:21 +0200
Subject: [PATCH] Removing dead variables.

---
 server/text_generation_server/models/bloom.py            | 1 -
 server/text_generation_server/models/flash_llama.py      | 3 ---
 server/text_generation_server/models/flash_neox.py       | 4 +---
 server/text_generation_server/models/flash_santacoder.py | 3 ---
 server/text_generation_server/models/galactica.py        | 1 -
 server/text_generation_server/models/gpt_neox.py         | 1 -
 server/text_generation_server/models/opt.py              | 1 -
 server/text_generation_server/models/t5.py               | 1 -
 8 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/server/text_generation_server/models/bloom.py b/server/text_generation_server/models/bloom.py
index f6a69031..ed959291 100644
--- a/server/text_generation_server/models/bloom.py
+++ b/server/text_generation_server/models/bloom.py
@@ -72,7 +72,6 @@ class BLOOMSharded(BLOOM):
         quantize: Optional[str] = None,
     ):
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
diff --git a/server/text_generation_server/models/flash_llama.py b/server/text_generation_server/models/flash_llama.py
index 0b63f904..aa0b4483 100644
--- a/server/text_generation_server/models/flash_llama.py
+++ b/server/text_generation_server/models/flash_llama.py
@@ -29,7 +29,6 @@ tracer = trace.get_tracer(__name__)
 
 class FlashLlama(FlashCausalLM):
     def __init__(self, model_id: str, revision: Optional[str] = None, quantize=False):
-        self.past_pad = None
         if torch.cuda.is_available():
             device = torch.device("cuda")
             dtype = torch.float16
@@ -150,9 +149,7 @@ class FlashLlamaSharded(FlashLlama):
         revision: Optional[str] = None,
         quantize: Optional[str] = None,
     ):
-        self.past_pad = None
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
diff --git a/server/text_generation_server/models/flash_neox.py b/server/text_generation_server/models/flash_neox.py
index 168c9195..470fb73b 100644
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@@ -33,9 +33,7 @@ class FlashNeoXSharded(FlashNeoX):
     def __init__(
         self, model_id: str, revision: Optional[str] = None, quantize: bool = False
     ):
-        self.past_pad = None
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
@@ -152,4 +150,4 @@ class FlashNeoXSharded(FlashNeoX):
                     else:
                         module._buffers[param_name] = tensor
 
-        model.post_load_weights(quantize)
+        # model.post_load_weights(quantize)
diff --git a/server/text_generation_server/models/flash_santacoder.py b/server/text_generation_server/models/flash_santacoder.py
index 51a8998b..f810bb0b 100644
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@@ -28,7 +28,6 @@ tracer = trace.get_tracer(__name__)
 
 class FlashSantacoder(FlashCausalLM):
     def __init__(self, model_id: str, revision: Optional[str] = None, quantize=False):
-        self.past_pad = None
         if torch.cuda.is_available():
             device = torch.device("cuda")
             dtype = torch.float16
@@ -173,9 +172,7 @@ class FlashSantacoderSharded(FlashSantacoder):
     def __init__(
         self, model_id: str, revision: Optional[str] = None, quantize: bool = False
     ):
-        self.past_pad = None
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
diff --git a/server/text_generation_server/models/galactica.py b/server/text_generation_server/models/galactica.py
index c6dd4c33..a0111250 100644
--- a/server/text_generation_server/models/galactica.py
+++ b/server/text_generation_server/models/galactica.py
@@ -199,7 +199,6 @@ class GalacticaSharded(Galactica):
         quantize: Optional[str] = None,
     ):
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
diff --git a/server/text_generation_server/models/gpt_neox.py b/server/text_generation_server/models/gpt_neox.py
index 215bb2b6..3e8557b2 100644
--- a/server/text_generation_server/models/gpt_neox.py
+++ b/server/text_generation_server/models/gpt_neox.py
@@ -38,7 +38,6 @@ class GPTNeoxSharded(CausalLM):
         quantize: Optional[str] = None,
     ):
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
diff --git a/server/text_generation_server/models/opt.py b/server/text_generation_server/models/opt.py
index 8d856b10..c83c3351 100644
--- a/server/text_generation_server/models/opt.py
+++ b/server/text_generation_server/models/opt.py
@@ -51,7 +51,6 @@ class OPTSharded(OPT):
         self, model_id: str, revision: Optional[str] = None, quantize: bool = False
     ):
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16
diff --git a/server/text_generation_server/models/t5.py b/server/text_generation_server/models/t5.py
index b5e7710d..6fe77ca2 100644
--- a/server/text_generation_server/models/t5.py
+++ b/server/text_generation_server/models/t5.py
@@ -38,7 +38,6 @@ class T5Sharded(Seq2SeqLM):
         quantize: Optional[str] = None,
     ):
         self.process_group, rank, world_size = initialize_torch_distributed()
-        self.master = rank == 0
         if torch.cuda.is_available():
             device = torch.device(f"cuda:{rank}")
             dtype = torch.float16