diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index 14c69a2b..901ec931 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -129,7 +129,7 @@ jobs:
                 export label_extension="-gaudi"
                 export docker_volume="/mnt/cache"
                 export docker_devices=""
-                export runs_on="itac-bm-emr-gaudi3-dell-8gaudi"
+                export runs_on="itac-bm-emr-gaudi3-dell-2gaudi"
                 export platform=""
                 export extra_pytest="--gaudi"
                 export target=""
diff --git a/integration-tests/gaudi/test_gaudi_generate.py b/integration-tests/gaudi/test_gaudi_generate.py
index 2b8b0c76..1316a48c 100644
--- a/integration-tests/gaudi/test_gaudi_generate.py
+++ b/integration-tests/gaudi/test_gaudi_generate.py
@@ -12,7 +12,7 @@ def pytest_configure(config):
 
 # The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures.
 TEST_CONFIGS = {
-    "meta-llama/Llama-3.1-8B-Instruct-shared": {
+    "meta-llama/Llama-3.1-8B-Instruct-sharded": {
         "model_id": "meta-llama/Llama-3.1-8B-Instruct",
         "input": "What is Deep Learning?",
         "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
@@ -21,13 +21,13 @@ TEST_CONFIGS = {
             "--sharded",
             "true",
             "--num-shard",
-            "8",
+            "2",
             "--max-input-tokens",
             "512",
             "--max-total-tokens",
             "1024",
             "--max-batch-size",
-            "8",
+            "4",
             "--max-batch-prefill-tokens",
             "2048",
         ],