mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-06-19 15:52:08 +00:00
[Gaudi] Fix the integration-test issues (#3265)
Signed-off-by: yuanwu <yuan.wu@intel.com>
This commit is contained in:
parent
ded4cb52ac
commit
3752143b39
@ -50,6 +50,7 @@ local-dev-install: install-dependencies
|
|||||||
|
|
||||||
# In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
|
# In order to run the integration tests, you need to first build the image (make -C backends/gaudi image)
|
||||||
run-integration-tests:
|
run-integration-tests:
|
||||||
|
pip install -U pip uv
|
||||||
uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
|
uv pip install -r ${root_dir}/backends/gaudi/server/integration-tests/requirements.txt
|
||||||
DOCKER_VOLUME=${root_dir}/data \
|
DOCKER_VOLUME=${root_dir}/data \
|
||||||
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
||||||
@ -57,6 +58,7 @@ run-integration-tests:
|
|||||||
|
|
||||||
# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
|
# This is used to capture the expected outputs for the integration tests offering an easy way to add more models to the integration tests
|
||||||
capture-expected-outputs-for-integration-tests:
|
capture-expected-outputs-for-integration-tests:
|
||||||
|
pip install -U pip uv
|
||||||
DOCKER_VOLUME=${root_dir}/data \
|
DOCKER_VOLUME=${root_dir}/data \
|
||||||
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
HF_TOKEN=`cat ${HOME}/.cache/huggingface/token` \
|
||||||
uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests/capture_expected_outputs.py
|
uv run pytest --durations=0 -sv ${root_dir}/backends/gaudi/server/integration-tests/capture_expected_outputs.py
|
||||||
|
@ -9,8 +9,8 @@ TEST_CONFIGS = {
|
|||||||
"meta-llama/Llama-3.1-8B-Instruct-shared": {
|
"meta-llama/Llama-3.1-8B-Instruct-shared": {
|
||||||
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
"model_id": "meta-llama/Llama-3.1-8B-Instruct",
|
||||||
"input": "What is Deep Learning?",
|
"input": "What is Deep Learning?",
|
||||||
"expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
|
"expected_greedy_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
||||||
"expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use",
|
"expected_batch_output": " A Beginner’s Guide\nDeep learning is a subset of machine learning that involves the use of artificial neural networks to analyze and interpret data. It is a type of",
|
||||||
"args": [
|
"args": [
|
||||||
"--sharded",
|
"--sharded",
|
||||||
"true",
|
"true",
|
||||||
@ -165,20 +165,6 @@ TEST_CONFIGS = {
|
|||||||
"4",
|
"4",
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"facebook/opt-125m": {
|
|
||||||
"model_id": "facebook/opt-125m",
|
|
||||||
"input": "What is Deep Learning?",
|
|
||||||
"expected_greedy_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout",
|
|
||||||
"expected_batch_output": "\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout the Author\n\nAbout",
|
|
||||||
"args": [
|
|
||||||
"--max-input-tokens",
|
|
||||||
"512",
|
|
||||||
"--max-total-tokens",
|
|
||||||
"1024",
|
|
||||||
"--max-batch-size",
|
|
||||||
"4",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"EleutherAI/gpt-j-6b": {
|
"EleutherAI/gpt-j-6b": {
|
||||||
"model_id": "EleutherAI/gpt-j-6b",
|
"model_id": "EleutherAI/gpt-j-6b",
|
||||||
"input": "What is Deep Learning?",
|
"input": "What is Deep Learning?",
|
||||||
|
@ -111,6 +111,7 @@ class MistralAttention(torch.nn.Module):
|
|||||||
)
|
)
|
||||||
self.num_heads = config.num_attention_heads
|
self.num_heads = config.num_attention_heads
|
||||||
self.hidden_size = config.hidden_size
|
self.hidden_size = config.hidden_size
|
||||||
|
|
||||||
if getattr(config, "head_dim", None) is not None:
|
if getattr(config, "head_dim", None) is not None:
|
||||||
self.head_size = config.head_dim
|
self.head_size = config.head_dim
|
||||||
else:
|
else:
|
||||||
|
Loading…
Reference in New Issue
Block a user