fix(neuron): neuron config is not stored in config anymore

This commit is contained in:
David Corvoysier 2025-05-23 09:48:05 +00:00
parent c4dd2a8197
commit 39895019c8
3 changed files with 37 additions and 8 deletions

View File

@ -7,7 +7,8 @@ from typing import List, Optional, Tuple
import torch import torch
from loguru import logger from loguru import logger
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase from transformers import AutoTokenizer, PreTrainedTokenizerBase
from optimum.neuron.configuration_utils import NeuronConfig
from transformers.generation import GenerationConfig from transformers.generation import GenerationConfig
from optimum.neuron import NeuronModelForCausalLM from optimum.neuron import NeuronModelForCausalLM
@ -663,8 +664,16 @@ class NeuronGenerator(Generator):
Returns: Returns:
A NeuronGenerator. A NeuronGenerator.
""" """
config = AutoConfig.from_pretrained(model_id) try:
neuron_config = getattr(config, "neuron", None) neuron_config = NeuronConfig.from_pretrained(model_id, revision=revision)
except Exception as e:
logger.debug(
"NeuronConfig.from_pretrained failed for model %s, revision %s: %s",
model_id,
revision,
e,
)
neuron_config = None
start = time.time() start = time.time()
if neuron_config is None: if neuron_config is None:
export_kwargs = get_export_kwargs_from_env() export_kwargs = get_export_kwargs_from_env()

View File

@ -10,6 +10,7 @@ from transformers import AutoConfig
from optimum.neuron import NeuronModelForCausalLM from optimum.neuron import NeuronModelForCausalLM
from optimum.neuron.cache import get_hub_cached_entries from optimum.neuron.cache import get_hub_cached_entries
from optimum.neuron.configuration_utils import NeuronConfig
def get_export_kwargs_from_env(): def get_export_kwargs_from_env():
@ -87,8 +88,16 @@ def fetch_model(
revision = None revision = None
# Download the model from the Hub (HUGGING_FACE_HUB_TOKEN must be set for a private or gated model) # Download the model from the Hub (HUGGING_FACE_HUB_TOKEN must be set for a private or gated model)
# Note that the model may already be present in the cache. # Note that the model may already be present in the cache.
config = AutoConfig.from_pretrained(model_id, revision=revision) try:
neuron_config = getattr(config, "neuron", None) neuron_config = NeuronConfig.from_pretrained(model_id, revision=revision)
except Exception as e:
logger.debug(
"NeuronConfig.from_pretrained failed for model %s, revision %s: %s",
model_id,
revision,
e,
)
neuron_config = None
if neuron_config is not None: if neuron_config is not None:
if os.path.isdir(model_id): if os.path.isdir(model_id):
return model_id return model_id
@ -100,6 +109,7 @@ def fetch_model(
return snapshot_download(model_id, revision=revision, ignore_patterns="*.bin") return snapshot_download(model_id, revision=revision, ignore_patterns="*.bin")
# Model needs to be exported: look for compatible cached entries on the hub # Model needs to be exported: look for compatible cached entries on the hub
export_kwargs = get_export_kwargs_from_env() export_kwargs = get_export_kwargs_from_env()
config = AutoConfig.from_pretrained(model_id, revision=revision)
export_config = NeuronModelForCausalLM.get_export_config( export_config = NeuronModelForCausalLM.get_export_config(
model_id, config, revision=revision, **export_kwargs model_id, config, revision=revision, **export_kwargs
) )

View File

@ -7,10 +7,10 @@ import sys
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from huggingface_hub import constants from huggingface_hub import constants
from transformers import AutoConfig
from optimum.neuron.modeling_decoder import get_available_cores from optimum.neuron.modeling_decoder import get_available_cores
from optimum.neuron.cache import get_hub_cached_entries from optimum.neuron.cache import get_hub_cached_entries
from optimum.neuron.configuration_utils import NeuronConfig
from optimum.neuron.utils.version_utils import get_neuronxcc_version from optimum.neuron.utils.version_utils import get_neuronxcc_version
@ -238,8 +238,18 @@ def main():
logger.info("Cache dir %s, model %s", cache_dir, args.model_id) logger.info("Cache dir %s, model %s", cache_dir, args.model_id)
config = AutoConfig.from_pretrained(args.model_id, revision=args.revision) try:
neuron_config = getattr(config, "neuron", None) neuron_config = NeuronConfig.from_pretrained(
args.model_id, revision=args.revision
)
except Exception as e:
logger.debug(
"NeuronConfig.from_pretrained failed for model %s, revision %s: %s",
args.model_id,
args.revision,
e,
)
neuron_config = None
if neuron_config is not None: if neuron_config is not None:
compatible = check_env_and_neuron_config_compatibility( compatible = check_env_and_neuron_config_compatibility(
neuron_config, check_compiler_version=False neuron_config, check_compiler_version=False