From 15e178e3ad4ce0301881615a3694dd7c065e51ef Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 9 Oct 2024 16:08:03 +0200 Subject: [PATCH] Intel CI ? --- .github/workflows/build.yaml | 6 +++--- integration-tests/models/test_flash_llama.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index d0aaea27..0f037457 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -75,10 +75,10 @@ jobs: export label_extension="-intel-cpu" export docker_devices="none" export docker_volume="/mnt/cache" - export runs_on="ubuntu-latest" - # export runs_on="aws-highmemory-32-plus-priv" + # export runs_on="ubuntu-latest" + export runs_on="aws-highmemory-32-plus-priv" export platform="cpu" - export extra_pytest="-k test_flash_llama_load" + export extra_pytest="-k test_flash_llama_simple" ;; esac echo $dockerfile diff --git a/integration-tests/models/test_flash_llama.py b/integration-tests/models/test_flash_llama.py index c69314ff..bf49dc0b 100644 --- a/integration-tests/models/test_flash_llama.py +++ b/integration-tests/models/test_flash_llama.py @@ -15,7 +15,7 @@ async def flash_llama(flash_llama_handle): @pytest.mark.asyncio @pytest.mark.private -async def test_flash_llama(flash_llama, response_snapshot): +async def test_flash_llama_simple(flash_llama, response_snapshot): response = await flash_llama.generate( "Test request", max_new_tokens=10, decoder_input_details=True )