[Gaudi] Fix the integration-test issues (#3265)

Signed-off-by: yuanwu <yuan.wu@intel.com>
2025-06-19 15:52:08 +00:00 · 2025-06-13 20:47:06 +08:00 · 2025-06-13 20:47:06 +08:00 · 3752143b39
commit 3752143b39
parent ded4cb52ac
3 changed files with 5 additions and 16 deletions
--- a/backends/gaudi/Makefile
+++ b/backends/gaudi/Makefile
@ -50,6 +50,7 @@ local-dev-install: install-dependencies
 # In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
 run-integration-tests:
 	pip install -U pip uv
 	uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
 	DOCKER_VOLUME=${root_dir}/data \
 	HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
@ -57,6 +58,7 @@ run-integration-tests:
 # This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
 capture-expected-outputs-for-integration-tests:
 	pip install -U pip uv
 	DOCKER_VOLUME=${root_dir}/data \
 	HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
 	uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests/capture_expected_outputs.py
--- a/backends/gaudi/server/integration-tests/test_model.py
+++ b/backends/gaudi/server/integration-tests/test_model.py
@ -9,8 +9,8 @@ TEST_CONFIGS = {
    "meta-llama/Llama-3.1-8B-Instruct-shared": {
        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
        "input": "What is Deep Learning?",
-        "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
+        "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
-        "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
+        "expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
        "args": [
            "--sharded",
            "true",
@ -165,20 +165,6 @@ TEST_CONFIGS = {
            "4",
        ],
    },
    "facebook/opt-125m": {
        "model_id": "facebook/opt-125m",
        "input": "What is Deep Learning?",
        "expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout",
        "expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout",
        "args": [
            "--max-input-tokens",
            "512",
            "--max-total-tokens",
            "1024",
            "--max-batch-size",
            "4",
        ],
    },
    "EleutherAI/gpt-j-6b": {
        "model_id": "EleutherAI/gpt-j-6b",
        "input": "What is Deep Learning?",
--- a/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
+++ b/backends/gaudi/server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py
@ -111,6 +111,7 @@ class MistralAttention(torch.nn.Module):
        )
        self.num_heads = config.num_attention_heads
        self.hidden_size = config.hidden_size
        if getattr(config, "head_dim", None) is not None:
            self.head_size = config.head_dim
        else: