Removing dead variables.

2025-09-09 19:34:53 +00:00 · 2023-05-15 12:33:21 +02:00 · 2023-05-15 12:33:21 +02:00 · 89ff4e901a
commit 89ff4e901a
parent 91e674bb85
8 changed files with 1 additions and 14 deletions
--- a/server/text_generation_server/models/bloom.py
+++ b/server/text_generation_server/models/bloom.py
@ -72,7 +72,6 @@ class BLOOMSharded(BLOOM):
        quantize: Optional[str] = None,
    ):
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
--- a/server/text_generation_server/models/flash_llama.py
+++ b/server/text_generation_server/models/flash_llama.py
@ -29,7 +29,6 @@ tracer = trace.get_tracer(__name__)
 class FlashLlama(FlashCausalLM):
    def __init__(self, model_id: str, revision: Optional[str] = None, quantize=False):
        self.past_pad = None
        if torch.cuda.is_available():
            device = torch.device("cuda")
            dtype = torch.float16
@ -150,9 +149,7 @@ class FlashLlamaSharded(FlashLlama):
        revision: Optional[str] = None,
        quantize: Optional[str] = None,
    ):
        self.past_pad = None
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
--- a/server/text_generation_server/models/flash_neox.py
+++ b/server/text_generation_server/models/flash_neox.py
@ -33,9 +33,7 @@ class FlashNeoXSharded(FlashNeoX):
    def __init__(
        self, model_id: str, revision: Optional[str] = None, quantize: bool = False
    ):
        self.past_pad = None
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
@ -152,4 +150,4 @@ class FlashNeoXSharded(FlashNeoX):
                    else:
                        module._buffers[param_name] = tensor
-        model.post_load_weights(quantize)
+        # model.post_load_weights(quantize)
--- a/server/text_generation_server/models/flash_santacoder.py
+++ b/server/text_generation_server/models/flash_santacoder.py
@ -28,7 +28,6 @@ tracer = trace.get_tracer(__name__)
 class FlashSantacoder(FlashCausalLM):
    def __init__(self, model_id: str, revision: Optional[str] = None, quantize=False):
        self.past_pad = None
        if torch.cuda.is_available():
            device = torch.device("cuda")
            dtype = torch.float16
@ -173,9 +172,7 @@ class FlashSantacoderSharded(FlashSantacoder):
    def __init__(
        self, model_id: str, revision: Optional[str] = None, quantize: bool = False
    ):
        self.past_pad = None
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
--- a/server/text_generation_server/models/galactica.py
+++ b/server/text_generation_server/models/galactica.py
@ -199,7 +199,6 @@ class GalacticaSharded(Galactica):
        quantize: Optional[str] = None,
    ):
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
--- a/server/text_generation_server/models/gpt_neox.py
+++ b/server/text_generation_server/models/gpt_neox.py
@ -38,7 +38,6 @@ class GPTNeoxSharded(CausalLM):
        quantize: Optional[str] = None,
    ):
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
--- a/server/text_generation_server/models/opt.py
+++ b/server/text_generation_server/models/opt.py
@ -51,7 +51,6 @@ class OPTSharded(OPT):
        self, model_id: str, revision: Optional[str] = None, quantize: bool = False
    ):
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16
--- a/server/text_generation_server/models/t5.py
+++ b/server/text_generation_server/models/t5.py
@ -38,7 +38,6 @@ class T5Sharded(Seq2SeqLM):
        quantize: Optional[str] = None,
    ):
        self.process_group, rank, world_size = initialize_torch_distributed()
        self.master = rank == 0
        if torch.cuda.is_available():
            device = torch.device(f"cuda:{rank}")
            dtype = torch.float16