fix: revert non snapshot changes

2025-09-12 12:54:52 +00:00 · 2024-07-29 14:05:55 +00:00 · 2024-07-29 14:05:55 +00:00 · b5f61e92b5
commit b5f61e92b5
parent 68854d11ef
3 changed files with 10 additions and 26 deletions
--- a/server/Makefile-selective-scan
+++ b/server/Makefile-selective-scan
@ -26,6 +26,3 @@ install-selective-scan: install-causal-conv1d build-selective-scan
 	cd mamba && pip install .
 build-all: build-causal-conv1d build-selective-scan
 install-ssm: install-causal-conv1d install-selective-scan
 	@echo "Selective scan model installed"
--- a/server/text_generation_server/models/causal_lm.py
+++ b/server/text_generation_server/models/causal_lm.py
@ -609,16 +609,15 @@ class CausalLM(Model):
            truncation_side="left",
            trust_remote_code=trust_remote_code,
        )
        device_map = (
            "auto"
            if torch.cuda.is_available() and torch.cuda.device_count() > 1
            else None
        )
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=dtype,
-            device_map=device_map,
+            device_map=(
                "auto"
                if torch.cuda.is_available() and torch.cuda.device_count() > 1
                else None
            ),
            load_in_8bit=quantize == "bitsandbytes",
            trust_remote_code=trust_remote_code,
        )
@ -629,11 +628,6 @@ class CausalLM(Model):
        ):
            model = model.cuda()
        # if device_map is "auto", it's unclear which device the model is on
        # therefore, we need to get the device the model is on after loading
        if device_map is not None:
            device = next(model.parameters()).device
        if tokenizer.pad_token_id is None:
            if model.config.pad_token_id is not None:
                tokenizer.pad_token_id = model.config.pad_token_id
--- a/server/text_generation_server/models/seq2seq_lm.py
+++ b/server/text_generation_server/models/seq2seq_lm.py
@ -639,28 +639,21 @@ class Seq2SeqLM(Model):
            device = torch.device("cpu")
            dtype = torch.float32 if dtype is None else dtype
        device_map = (
            "auto"
            if torch.cuda.is_available() and torch.cuda.device_count() > 1
            else None
        )
        model = AutoModelForSeq2SeqLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=dtype,
-            device_map=device_map,
+            device_map=(
                "auto"
                if torch.cuda.is_available() and torch.cuda.device_count() > 1
                else None
            ),
            load_in_8bit=quantize == "bitsandbytes",
            trust_remote_code=trust_remote_code,
        )
        if torch.cuda.is_available() and torch.cuda.device_count() == 1:
            model = model.cuda()
        # if device_map is "auto", it's unclear which device the model is on
        # therefore, we need to get the device the model is on after loading
        if device_map is not None:
            device = next(model.parameters()).device
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            revision=revision,