Fixing the download strategy for ibm-fms

2025-09-11 20:34:54 +00:00 · 2024-05-17 10:33:00 +00:00 · 2024-05-17 10:33:00 +00:00 · e5416274df
commit e5416274df
parent b5f1c9de06
1 changed files with 26 additions and 20 deletions
--- a/server/text_generation_server/cli.py
+++ b/server/text_generation_server/cli.py
@ -154,13 +154,21 @@ def download_weights(
        try:
            import json
-            medusa_head = hf_hub_download(
+            # TODO remove this, there's currently logic to avoid detecting this file
-                model_id, revision=revision, filename="medusa_lm_head.safetensors"
+            # as being enough to say the entire repo is on disk, since we also need the parent model
-            )
+            # We're keeping this potential download to prevent breaking backward compatibilty,
-            medusa_config = hf_hub_download(
+            # but it shouldn't be necessary in the flow here.
            try:
                medusa_head = hf_hub_download(
                    model_id, revision=revision, filename="medusa_lm_head.safetensors"
                )
            except Exception:
                pass
            config = hf_hub_download(
                model_id, revision=revision, filename="config.json"
            )
-            with open(medusa_config, "r") as f:
+            with open(config, "r") as f:
                config = json.load(f)
            model_id = config["base_model_name_or_path"]
@ -195,14 +203,23 @@ def download_weights(
            if not extension == ".safetensors" or not auto_convert:
                raise e
-    elif (Path(model_id) / "medusa_lm_head.safetensors").exists():
+    elif (Path(model_id) / "adapter_config.json").exists():
        # Try to load as a local PEFT model
        try:
            utils.download_and_unload_peft(
                model_id, revision, trust_remote_code=trust_remote_code
            )
            utils.weight_files(model_id, revision, extension)
            return
        except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
            pass
    elif (Path(model_id) / "config.json").exists():
        # Try to load as a local Medusa model
        try:
            import json
-            medusa_head = Path(model_id) / "medusa_lm_head.safetensors"
+            config = Path(model_id) / "config.json"
-            medusa_config = Path(model_id) / "config.json"
+            with open(config, "r") as f:
            with open(medusa_config, "r") as f:
                config = json.load(f)
            model_id = config["base_model_name_or_path"]
@ -220,17 +237,6 @@ def download_weights(
        except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
            pass
    elif (Path(model_id) / "adapter_config.json").exists():
        # Try to load as a local PEFT model
        try:
            utils.download_and_unload_peft(
                model_id, revision, trust_remote_code=trust_remote_code
            )
            utils.weight_files(model_id, revision, extension)
            return
        except (utils.LocalEntryNotFoundError, utils.EntryNotFoundError):
            pass
    # Try to see if there are local pytorch weights
    try:
        # Get weights for a local model, a hub cached model and inside the WEIGHTS_CACHE_OVERRIDE