diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 14c69a2b..901ec931 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -129,7 +129,7 @@ jobs: export label_extension="-gaudi" export docker_volume="/mnt/cache" export docker_devices="" - export runs_on="itac-bm-emr-gaudi3-dell-8gaudi" + export runs_on="itac-bm-emr-gaudi3-dell-2gaudi" export platform="" export extra_pytest="--gaudi" export target="" diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py index 2b8b0c76..1316a48c 100644 --- a/integration-tests/gaudi/test_gaudi_generate.py +++ b/integration-tests/gaudi/test_gaudi_generate.py @@ -12,7 +12,7 @@ def pytest_configure(config): # The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures. TEST_CONFIGS = { - "meta-llama/Llama-3.1-8B-Instruct-shared": { + "meta-llama/Llama-3.1-8B-Instruct-sharded": { "model_id": "meta-llama/Llama-3.1-8B-Instruct", "input": "What is Deep Learning?", "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of", @@ -21,13 +21,13 @@ TEST_CONFIGS = { "--sharded", "true", "--num-shard", - "8", + "2", "--max-input-tokens", "512", "--max-total-tokens", "1024", "--max-batch-size", - "8", + "4", "--max-batch-prefill-tokens", "2048", ],