fix(ci): use smaller runner

2025-06-24 01:50:17 +00:00 · 2025-06-23 18:17:44 +00:00 · 2025-06-23 18:17:44 +00:00 · b159d02744
commit b159d02744
parent ae7f3aeba1
2 changed files with 4 additions and 4 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -129,7 +129,7 @@ jobs:
                export label_extension="-gaudi"
                export docker_volume="/mnt/cache"
                export docker_devices=""
-                export runs_on="itac-bm-emr-gaudi3-dell-8gaudi"
+                export runs_on="itac-bm-emr-gaudi3-dell-2gaudi"
                export platform=""
                export extra_pytest="--gaudi"
                export target=""
--- a/integration-tests/gaudi/test_gaudi_generate.py
+++ b/integration-tests/gaudi/test_gaudi_generate.py
@ -12,7 +12,7 @@ def pytest_configure(config):

 # The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures.
 TEST_CONFIGS = {
-    "meta-llama/Llama-3.1-8B-Instruct-shared": {
+    "meta-llama/Llama-3.1-8B-Instruct-sharded": {
        "model_id": "meta-llama/Llama-3.1-8B-Instruct",
        "input": "What is Deep Learning?",
        "expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
@ -21,13 +21,13 @@ TEST_CONFIGS = {
            "--sharded",
            "true",
            "--num-shard",
-            "8",
+            "2",
            "--max-input-tokens",
            "512",
            "--max-total-tokens",
            "1024",
            "--max-batch-size",
-            "8",
+            "4",
            "--max-batch-prefill-tokens",
            "2048",
        ],