diff --git a/backends/gaudi/Makefile b/backends/gaudi/Makefile index 77581517..e135f16e 100644 --- a/backends/gaudi/Makefile +++ b/backends/gaudi/Makefile @@ -50,6 +50,7 @@ local-dev-install: install-dependencies # In order to run the integration tests, you need to first build the image (make -C backends/gaudi image) run-integration-tests: + pip install -U pip uv uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ @@ -57,6 +58,7 @@ run-integration-tests: # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests capture-expected-outputs-for-integration-tests: + pip install -U pip uv DOCKER_VOLUME=${root_dir}/data \ HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \ uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests/capture_expected_outputs.py diff --git a/backends/gaudi/server/integration-tests/test_model.py b/backends/gaudi/server/integration-tests/test_model.py index cb2bf6a9..40b27164 100644 --- a/backends/gaudi/server/integration-tests/test_model.py +++ b/backends/gaudi/server/integration-tests/test_model.py @@ -9,8 +9,8 @@ TEST_CONFIGS = { "meta-llama/Llama-3.1-8B-Instruct-shared": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "input": "What is Deep Learning?", - "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use", - "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use", + "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", + "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", "args": [ "--sharded", "true", @@ -165,20 +165,6 @@ TEST_CONFIGS = { "4", ], }, - "facebook/opt-125m": { - "model_id": "facebook/opt-125m", - "input": "What is Deep Learning?", - "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", - "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout", - "args": [ - "--max-input-tokens", - "512", - "--max-total-tokens", - "1024", - "--max-batch-size", - "4", - ], - }, "EleutherAI/gpt-j-6b": { "model_id": "EleutherAI/gpt-j-6b", "input": "What is Deep Learning?", diff --git a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py index d45cd6ce..008df32d 100644 --- a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py +++ b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py @@ -111,6 +111,7 @@ class MistralAttention(torch.nn.Module): ) self.num_heads = config.num_attention_heads self.hidden_size = config.hidden_size + if getattr(config, "head_dim", None) is not None: self.head_size = config.head_dim else: