fix(ci): use smaller runner

This commit is contained in:
baptiste 2025-06-23 18:17:44 +00:00
parent ae7f3aeba1
commit b159d02744
2 changed files with 4 additions and 4 deletions

View File

@ -129,7 +129,7 @@ jobs:
export label_extension="-gaudi"
export docker_volume="/mnt/cache"
export docker_devices=""
export runs_on="itac-bm-emr-gaudi3-dell-8gaudi"
export runs_on="itac-bm-emr-gaudi3-dell-2gaudi"
export platform=""
export extra_pytest="--gaudi"
export target=""

View File

@ -12,7 +12,7 @@ def pytest_configure(config):
# The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures.
TEST_CONFIGS = {
"meta-llama/Llama-3.1-8B-Instruct-shared": {
"meta-llama/Llama-3.1-8B-Instruct-sharded": {
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
"input": "What is Deep Learning?",
"expected_greedy_output": " A Beginners Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
@ -21,13 +21,13 @@ TEST_CONFIGS = {
"--sharded",
"true",
"--num-shard",
"8",
"2",
"--max-input-tokens",
"512",
"--max-total-tokens",
"1024",
"--max-batch-size",
"8",
"4",
"--max-batch-prefill-tokens",
"2048",
],