Directly load GPTBigCode to specified device

This PR directly load GPTBigCode to specified device, avoiding moving model between devices.
2025-09-10 11:54:52 +00:00 · 2023-07-15 00:32:46 -07:00 · 2023-07-15 00:32:46 -07:00 · 3e5165c3ed
commit 3e5165c3ed
parent c58a0c185b
1 changed files with 8 additions and 8 deletions
--- a/server/text_generation_server/models/santacoder.py
+++ b/server/text_generation_server/models/santacoder.py
@ -51,14 +51,14 @@ class SantaCoder(CausalLM):
                "pad_token": EOD,
            }
        )
-
-        model = AutoModelForCausalLM.from_pretrained(
-            model_id,
-            revision=revision,
-            torch_dtype=dtype,
-            load_in_8bit=quantize == "bitsandbytes",
-            trust_remote_code=trust_remote_code,
-        ).to(device)
+        with device:
+            model = AutoModelForCausalLM.from_pretrained(
+                model_id,
+                revision=revision,
+                torch_dtype=dtype,
+                load_in_8bit=quantize == "bitsandbytes",
+                trust_remote_code=trust_remote_code,
+            )

        super(CausalLM, self).__init__(
            model=model,