mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-09 11:24:53 +00:00
fix(neuron): neuron config is not stored in config anymore
This commit is contained in:
parent
c4dd2a8197
commit
39895019c8
@ -7,7 +7,8 @@ from typing import List, Optional, Tuple
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from transformers import AutoConfig, AutoTokenizer, PreTrainedTokenizerBase
|
from transformers import AutoTokenizer, PreTrainedTokenizerBase
|
||||||
|
from optimum.neuron.configuration_utils import NeuronConfig
|
||||||
from transformers.generation import GenerationConfig
|
from transformers.generation import GenerationConfig
|
||||||
|
|
||||||
from optimum.neuron import NeuronModelForCausalLM
|
from optimum.neuron import NeuronModelForCausalLM
|
||||||
@ -663,8 +664,16 @@ class NeuronGenerator(Generator):
|
|||||||
Returns:
|
Returns:
|
||||||
A NeuronGenerator.
|
A NeuronGenerator.
|
||||||
"""
|
"""
|
||||||
config = AutoConfig.from_pretrained(model_id)
|
try:
|
||||||
neuron_config = getattr(config, "neuron", None)
|
neuron_config = NeuronConfig.from_pretrained(model_id, revision=revision)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
"NeuronConfig.from_pretrained failed for model %s, revision %s: %s",
|
||||||
|
model_id,
|
||||||
|
revision,
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
neuron_config = None
|
||||||
start = time.time()
|
start = time.time()
|
||||||
if neuron_config is None:
|
if neuron_config is None:
|
||||||
export_kwargs = get_export_kwargs_from_env()
|
export_kwargs = get_export_kwargs_from_env()
|
||||||
|
@ -10,6 +10,7 @@ from transformers import AutoConfig
|
|||||||
|
|
||||||
from optimum.neuron import NeuronModelForCausalLM
|
from optimum.neuron import NeuronModelForCausalLM
|
||||||
from optimum.neuron.cache import get_hub_cached_entries
|
from optimum.neuron.cache import get_hub_cached_entries
|
||||||
|
from optimum.neuron.configuration_utils import NeuronConfig
|
||||||
|
|
||||||
|
|
||||||
def get_export_kwargs_from_env():
|
def get_export_kwargs_from_env():
|
||||||
@ -87,8 +88,16 @@ def fetch_model(
|
|||||||
revision = None
|
revision = None
|
||||||
# Download the model from the Hub (HUGGING_FACE_HUB_TOKEN must be set for a private or gated model)
|
# Download the model from the Hub (HUGGING_FACE_HUB_TOKEN must be set for a private or gated model)
|
||||||
# Note that the model may already be present in the cache.
|
# Note that the model may already be present in the cache.
|
||||||
config = AutoConfig.from_pretrained(model_id, revision=revision)
|
try:
|
||||||
neuron_config = getattr(config, "neuron", None)
|
neuron_config = NeuronConfig.from_pretrained(model_id, revision=revision)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
"NeuronConfig.from_pretrained failed for model %s, revision %s: %s",
|
||||||
|
model_id,
|
||||||
|
revision,
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
neuron_config = None
|
||||||
if neuron_config is not None:
|
if neuron_config is not None:
|
||||||
if os.path.isdir(model_id):
|
if os.path.isdir(model_id):
|
||||||
return model_id
|
return model_id
|
||||||
@ -100,6 +109,7 @@ def fetch_model(
|
|||||||
return snapshot_download(model_id, revision=revision, ignore_patterns="*.bin")
|
return snapshot_download(model_id, revision=revision, ignore_patterns="*.bin")
|
||||||
# Model needs to be exported: look for compatible cached entries on the hub
|
# Model needs to be exported: look for compatible cached entries on the hub
|
||||||
export_kwargs = get_export_kwargs_from_env()
|
export_kwargs = get_export_kwargs_from_env()
|
||||||
|
config = AutoConfig.from_pretrained(model_id, revision=revision)
|
||||||
export_config = NeuronModelForCausalLM.get_export_config(
|
export_config = NeuronModelForCausalLM.get_export_config(
|
||||||
model_id, config, revision=revision, **export_kwargs
|
model_id, config, revision=revision, **export_kwargs
|
||||||
)
|
)
|
||||||
|
@ -7,10 +7,10 @@ import sys
|
|||||||
from typing import Any, Dict, List, Optional
|
from typing import Any, Dict, List, Optional
|
||||||
|
|
||||||
from huggingface_hub import constants
|
from huggingface_hub import constants
|
||||||
from transformers import AutoConfig
|
|
||||||
|
|
||||||
from optimum.neuron.modeling_decoder import get_available_cores
|
from optimum.neuron.modeling_decoder import get_available_cores
|
||||||
from optimum.neuron.cache import get_hub_cached_entries
|
from optimum.neuron.cache import get_hub_cached_entries
|
||||||
|
from optimum.neuron.configuration_utils import NeuronConfig
|
||||||
from optimum.neuron.utils.version_utils import get_neuronxcc_version
|
from optimum.neuron.utils.version_utils import get_neuronxcc_version
|
||||||
|
|
||||||
|
|
||||||
@ -238,8 +238,18 @@ def main():
|
|||||||
|
|
||||||
logger.info("Cache dir %s, model %s", cache_dir, args.model_id)
|
logger.info("Cache dir %s, model %s", cache_dir, args.model_id)
|
||||||
|
|
||||||
config = AutoConfig.from_pretrained(args.model_id, revision=args.revision)
|
try:
|
||||||
neuron_config = getattr(config, "neuron", None)
|
neuron_config = NeuronConfig.from_pretrained(
|
||||||
|
args.model_id, revision=args.revision
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(
|
||||||
|
"NeuronConfig.from_pretrained failed for model %s, revision %s: %s",
|
||||||
|
args.model_id,
|
||||||
|
args.revision,
|
||||||
|
e,
|
||||||
|
)
|
||||||
|
neuron_config = None
|
||||||
if neuron_config is not None:
|
if neuron_config is not None:
|
||||||
compatible = check_env_and_neuron_config_compatibility(
|
compatible = check_env_and_neuron_config_compatibility(
|
||||||
neuron_config, check_compiler_version=False
|
neuron_config, check_compiler_version=False
|
||||||
|
Loading…
Reference in New Issue
Block a user