mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-24 01:50:17 +00:00
fix(ci): use smaller runner
This commit is contained in:
parent
ae7f3aeba1
commit
b159d02744
2
.github/workflows/build.yaml
vendored
2
.github/workflows/build.yaml
vendored
@ -129,7 +129,7 @@ jobs:
|
||||
export label_extension="-gaudi"
|
||||
export docker_volume="/mnt/cache"
|
||||
export docker_devices=""
|
||||
export runs_on="itac-bm-emr-gaudi3-dell-8gaudi"
|
||||
export runs_on="itac-bm-emr-gaudi3-dell-2gaudi"
|
||||
export platform=""
|
||||
export extra_pytest="--gaudi"
|
||||
export target=""
|
||||
|
@ -12,7 +12,7 @@ def pytest_configure(config):
|
||||
|
||||
# The "args" values in TEST_CONFIGS are not optimized for speed but only check that the inference is working for the different models architectures.
|
||||
TEST_CONFIGS = {
|
||||
"meta-llama/Llama-3.1-8B-Instruct-shared": {
|
||||
"meta-llama/Llama-3.1-8B-Instruct-sharded": {
|
||||
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"input": "What is Deep Learning?",
|
||||
"expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
||||
@ -21,13 +21,13 @@ TEST_CONFIGS = {
|
||||
"--sharded",
|
||||
"true",
|
||||
"--num-shard",
|
||||
"8",
|
||||
"2",
|
||||
"--max-input-tokens",
|
||||
"512",
|
||||
"--max-total-tokens",
|
||||
"1024",
|
||||
"--max-batch-size",
|
||||
"8",
|
||||
"4",
|
||||
"--max-batch-prefill-tokens",
|
||||
"2048",
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user