From 8d7f18f41e9bd32090f73ad52532eb257563c95f Mon Sep 17 00:00:00 2001 From: fxmarty <9808326+fxmarty@users.noreply.github.com> Date: Fri, 17 May 2024 08:53:08 +0000 Subject: [PATCH] diff nicer --- server/text_generation_server/models/flash_cohere.py | 1 - server/text_generation_server/models/flash_dbrx.py | 1 - server/text_generation_server/models/flash_gemma.py | 1 - server/text_generation_server/models/flash_gpt2.py | 1 - server/text_generation_server/models/flash_llama.py | 1 - server/text_generation_server/models/flash_neox.py | 1 - server/text_generation_server/models/flash_phi.py | 1 - server/text_generation_server/models/flash_qwen2.py | 1 - server/text_generation_server/models/flash_rw.py | 1 - server/text_generation_server/models/flash_santacoder.py | 1 - server/text_generation_server/models/flash_starcoder2.py | 1 - server/text_generation_server/models/gpt_neox.py | 1 - server/text_generation_server/models/idefics.py | 1 - server/text_generation_server/models/mpt.py | 1 - server/text_generation_server/models/opt.py | 1 - server/text_generation_server/models/phi.py | 1 - server/text_generation_server/models/rw.py | 1 - server/text_generation_server/models/santacoder.py | 1 - server/text_generation_server/models/seq2seq_lm.py | 1 - server/text_generation_server/models/t5.py | 1 - 20 files changed, 20 deletions(-) diff --git a/server/text_generation_server/models/flash_cohere.py b/server/text_generation_server/models/flash_cohere.py index 8edaaa35..b907ee08 100644 --- a/server/text_generation_server/models/flash_cohere.py +++ b/server/text_generation_server/models/flash_cohere.py @@ -28,7 +28,6 @@ class FlashCohere(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_dbrx.py b/server/text_generation_server/models/flash_dbrx.py index 6a9b9d7f..d5eb1a6e 100644 --- a/server/text_generation_server/models/flash_dbrx.py +++ b/server/text_generation_server/models/flash_dbrx.py @@ -30,7 +30,6 @@ class FlashDbrx(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_gemma.py b/server/text_generation_server/models/flash_gemma.py index 70f1b65c..53bfd064 100644 --- a/server/text_generation_server/models/flash_gemma.py +++ b/server/text_generation_server/models/flash_gemma.py @@ -28,7 +28,6 @@ class FlashGemma(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_gpt2.py b/server/text_generation_server/models/flash_gpt2.py index 65bb9920..0067a806 100644 --- a/server/text_generation_server/models/flash_gpt2.py +++ b/server/text_generation_server/models/flash_gpt2.py @@ -30,7 +30,6 @@ class FlashGPT2(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_llama.py b/server/text_generation_server/models/flash_llama.py index 7395a41f..fa22322a 100644 --- a/server/text_generation_server/models/flash_llama.py +++ b/server/text_generation_server/models/flash_llama.py @@ -32,7 +32,6 @@ class FlashLlama(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_neox.py b/server/text_generation_server/models/flash_neox.py index 9242a124..adefaeb2 100644 --- a/server/text_generation_server/models/flash_neox.py +++ b/server/text_generation_server/models/flash_neox.py @@ -29,7 +29,6 @@ class FlashNeoXSharded(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_phi.py b/server/text_generation_server/models/flash_phi.py index 92f0a2b5..32b573a9 100644 --- a/server/text_generation_server/models/flash_phi.py +++ b/server/text_generation_server/models/flash_phi.py @@ -29,7 +29,6 @@ class FlashPhi(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_qwen2.py b/server/text_generation_server/models/flash_qwen2.py index 5ee8ffe6..59064b30 100644 --- a/server/text_generation_server/models/flash_qwen2.py +++ b/server/text_generation_server/models/flash_qwen2.py @@ -34,7 +34,6 @@ class FlashQwen2(BaseFlashMistral): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_rw.py b/server/text_generation_server/models/flash_rw.py index ccc90179..e6350611 100644 --- a/server/text_generation_server/models/flash_rw.py +++ b/server/text_generation_server/models/flash_rw.py @@ -30,7 +30,6 @@ class FlashRWSharded(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_santacoder.py b/server/text_generation_server/models/flash_santacoder.py index e1add297..2ad36b93 100644 --- a/server/text_generation_server/models/flash_santacoder.py +++ b/server/text_generation_server/models/flash_santacoder.py @@ -33,7 +33,6 @@ class FlashSantacoderSharded(FlashCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/flash_starcoder2.py b/server/text_generation_server/models/flash_starcoder2.py index 80323fb6..dc5d49be 100644 --- a/server/text_generation_server/models/flash_starcoder2.py +++ b/server/text_generation_server/models/flash_starcoder2.py @@ -33,7 +33,6 @@ class FlashStarcoder2(BaseFlashMistral): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/gpt_neox.py b/server/text_generation_server/models/gpt_neox.py index 92fa5ce4..c0e1adf2 100644 --- a/server/text_generation_server/models/gpt_neox.py +++ b/server/text_generation_server/models/gpt_neox.py @@ -28,7 +28,6 @@ class GPTNeoxSharded(CausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/idefics.py b/server/text_generation_server/models/idefics.py index 816c5e75..c1fe03e4 100644 --- a/server/text_generation_server/models/idefics.py +++ b/server/text_generation_server/models/idefics.py @@ -35,7 +35,6 @@ class IDEFICSSharded(IdeficsCausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/mpt.py b/server/text_generation_server/models/mpt.py index 6f6e837f..8d8b4909 100644 --- a/server/text_generation_server/models/mpt.py +++ b/server/text_generation_server/models/mpt.py @@ -47,7 +47,6 @@ class MPTSharded(CausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/opt.py b/server/text_generation_server/models/opt.py index 48584734..5b84f4ff 100644 --- a/server/text_generation_server/models/opt.py +++ b/server/text_generation_server/models/opt.py @@ -26,7 +26,6 @@ class OPTSharded(CausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") diff --git a/server/text_generation_server/models/phi.py b/server/text_generation_server/models/phi.py index d4dff836..d68866c1 100644 --- a/server/text_generation_server/models/phi.py +++ b/server/text_generation_server/models/phi.py @@ -26,7 +26,6 @@ class Phi(CausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, _rank, _world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device("cuda") diff --git a/server/text_generation_server/models/rw.py b/server/text_generation_server/models/rw.py index c347c47d..d4764ded 100644 --- a/server/text_generation_server/models/rw.py +++ b/server/text_generation_server/models/rw.py @@ -16,7 +16,6 @@ class RW(CausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - if speculator: raise RuntimeError("Medusa decoding is not enabled for AutoModel") diff --git a/server/text_generation_server/models/santacoder.py b/server/text_generation_server/models/santacoder.py index 188faf21..323e4324 100644 --- a/server/text_generation_server/models/santacoder.py +++ b/server/text_generation_server/models/santacoder.py @@ -23,7 +23,6 @@ class SantaCoder(CausalLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - if torch.cuda.is_available(): device = torch.device("cuda") dtype = torch.float16 if dtype is None else dtype diff --git a/server/text_generation_server/models/seq2seq_lm.py b/server/text_generation_server/models/seq2seq_lm.py index c5473107..710b0f7e 100644 --- a/server/text_generation_server/models/seq2seq_lm.py +++ b/server/text_generation_server/models/seq2seq_lm.py @@ -537,7 +537,6 @@ class Seq2SeqLM(Model): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - if speculator: raise RuntimeError("Speculator decoding is not enabled for AutoModel") diff --git a/server/text_generation_server/models/t5.py b/server/text_generation_server/models/t5.py index 674e9318..8e0735e5 100644 --- a/server/text_generation_server/models/t5.py +++ b/server/text_generation_server/models/t5.py @@ -29,7 +29,6 @@ class T5Sharded(Seq2SeqLM): dtype: Optional[torch.dtype] = None, trust_remote_code: bool = False, ): - self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}")