mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 03:52:08 +00:00
fix: fix local loading for .bin models (#1419)
This commit is contained in:
parent
fc9173aa59
commit
118344b99d
@ -242,7 +242,7 @@ def download_weights(
|
|||||||
if not extension == ".safetensors" or not auto_convert:
|
if not extension == ".safetensors" or not auto_convert:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
else:
|
elif (Path(model_id) / "adapter_config.json").exists():
|
||||||
# Try to load as a local PEFT model
|
# Try to load as a local PEFT model
|
||||||
try:
|
try:
|
||||||
utils.download_and_unload_peft(
|
utils.download_and_unload_peft(
|
||||||
|
@ -10,8 +10,7 @@ from peft import AutoPeftModelForCausalLM, AutoPeftModelForSeq2SeqLM
|
|||||||
def download_and_unload_peft(model_id, revision, trust_remote_code):
|
def download_and_unload_peft(model_id, revision, trust_remote_code):
|
||||||
torch_dtype = torch.float16
|
torch_dtype = torch.float16
|
||||||
|
|
||||||
logger.info("Peft model detected.")
|
logger.info("Trying to load a Peft model. It might take a while without feedback")
|
||||||
logger.info("Loading the model it might take a while without feedback")
|
|
||||||
try:
|
try:
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(
|
model = AutoPeftModelForCausalLM.from_pretrained(
|
||||||
model_id,
|
model_id,
|
||||||
@ -28,7 +27,7 @@ def download_and_unload_peft(model_id, revision, trust_remote_code):
|
|||||||
trust_remote_code=trust_remote_code,
|
trust_remote_code=trust_remote_code,
|
||||||
low_cpu_mem_usage=True,
|
low_cpu_mem_usage=True,
|
||||||
)
|
)
|
||||||
logger.info(f"Loaded.")
|
logger.info("Peft model detected.")
|
||||||
logger.info(f"Merging the lora weights.")
|
logger.info(f"Merging the lora weights.")
|
||||||
|
|
||||||
base_model_id = model.peft_config["default"].base_model_name_or_path
|
base_model_id = model.peft_config["default"].base_model_name_or_path
|
||||||
|
Loading…
Reference in New Issue
Block a user