text-generation-inference/backends/gaudi/server/text_generation_server/utils/peft.py

import os
from typing import Union
from loguru import logger
import torch

from transformers import AutoTokenizer
from peft import AutoPeftModelForCausalLM, AutoPeftModelForSeq2SeqLM


def download_and_unload_peft(model_id, revision, trust_remote_code):
    torch_dtype = torch.float16

    logger.info("Trying to load a Peft model. It might take a while without feedback")
    try:
        model = AutoPeftModelForCausalLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            low_cpu_mem_usage=True,
        )
    except Exception:
        model = AutoPeftModelForSeq2SeqLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            low_cpu_mem_usage=True,
        )
    logger.info("Peft model detected.")
    logger.info("Merging the lora weights.")

    base_model_id = model.peft_config["default"].base_model_name_or_path

    model = model.merge_and_unload()

    os.makedirs(model_id, exist_ok=True)
    cache_dir = model_id
    logger.info(f"Saving the newly created merged model to {cache_dir}")
    tokenizer = AutoTokenizer.from_pretrained(
        base_model_id, trust_remote_code=trust_remote_code
    )
    model.save_pretrained(cache_dir, safe_serialization=True)
    model.config.save_pretrained(cache_dir)
    tokenizer.save_pretrained(cache_dir)


def download_peft(
    model_id: Union[str, os.PathLike], revision: str, trust_remote_code: bool
):
    torch_dtype = torch.float16
    try:
        _model = AutoPeftModelForCausalLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            low_cpu_mem_usage=True,
        )
    except Exception:
        _model = AutoPeftModelForSeq2SeqLM.from_pretrained(
            model_id,
            revision=revision,
            torch_dtype=torch_dtype,
            trust_remote_code=trust_remote_code,
            low_cpu_mem_usage=True,
        )
    logger.info("Peft model downloaded.")
wip(gaudi): import server and dockerfile from tgi-gaudi fork 2025-02-24 08:40:44 +00:00			`import os`
			`from typing import Union`
			`from loguru import logger`
			`import torch`

			`from transformers import AutoTokenizer`
			`from peft import AutoPeftModelForCausalLM, AutoPeftModelForSeq2SeqLM`


			`def download_and_unload_peft(model_id, revision, trust_remote_code):`
			`torch_dtype = torch.float16`

			`logger.info("Trying to load a Peft model. It might take a while without feedback")`
			`try:`
			`model = AutoPeftModelForCausalLM.from_pretrained(`
			`model_id,`
			`revision=revision,`
			`torch_dtype=torch_dtype,`
			`trust_remote_code=trust_remote_code,`
			`low_cpu_mem_usage=True,`
			`)`
			`except Exception:`
			`model = AutoPeftModelForSeq2SeqLM.from_pretrained(`
			`model_id,`
			`revision=revision,`
			`torch_dtype=torch_dtype,`
			`trust_remote_code=trust_remote_code,`
			`low_cpu_mem_usage=True,`
			`)`
			`logger.info("Peft model detected.")`
			`logger.info("Merging the lora weights.")`

			`base_model_id = model.peft_config["default"].base_model_name_or_path`

			`model = model.merge_and_unload()`

			`os.makedirs(model_id, exist_ok=True)`
			`cache_dir = model_id`
			`logger.info(f"Saving the newly created merged model to {cache_dir}")`
			`tokenizer = AutoTokenizer.from_pretrained(`
			`base_model_id, trust_remote_code=trust_remote_code`
			`)`
			`model.save_pretrained(cache_dir, safe_serialization=True)`
			`model.config.save_pretrained(cache_dir)`
			`tokenizer.save_pretrained(cache_dir)`


			`def download_peft(`
			`model_id: Union[str, os.PathLike], revision: str, trust_remote_code: bool`
			`):`
			`torch_dtype = torch.float16`
			`try:`
			`_model = AutoPeftModelForCausalLM.from_pretrained(`
			`model_id,`
			`revision=revision,`
			`torch_dtype=torch_dtype,`
			`trust_remote_code=trust_remote_code,`
			`low_cpu_mem_usage=True,`
			`)`
			`except Exception:`
			`_model = AutoPeftModelForSeq2SeqLM.from_pretrained(`
			`model_id,`
			`revision=revision,`
			`torch_dtype=torch_dtype,`
			`trust_remote_code=trust_remote_code,`
			`low_cpu_mem_usage=True,`
			`)`
			`logger.info("Peft model downloaded.")`