mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 04:14:52 +00:00
fix:error use loadin8bit model.cuda
This commit is contained in:
parent
3af1a11401
commit
ac531c8d0a
@ -511,7 +511,7 @@ class CausalLM(Model):
|
|||||||
load_in_8bit=quantize == "bitsandbytes",
|
load_in_8bit=quantize == "bitsandbytes",
|
||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
)
|
)
|
||||||
if torch.cuda.is_available() and torch.cuda.device_count() == 1:
|
if torch.cuda.is_available() and torch.cuda.device_count() == 1 and quantize != "bitsandbytes":
|
||||||
model = model.cuda()
|
model = model.cuda()
|
||||||
|
|
||||||
if tokenizer.pad_token_id is None:
|
if tokenizer.pad_token_id is None:
|
||||||
|
Loading…
Reference in New Issue
Block a user