From 3dbdf63ec547e4119a2573af5d819e9f9f1eeef8 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Thu, 10 Oct 2024 16:51:57 +0200
Subject: [PATCH] Intel ci (#2630)

* Intel CI ?

* Let's try non sharded gemma.

* Snapshot rename

* Apparently container can be gone already.
---
 .github/workflows/build.yaml                                | 6 +++---
 integration-tests/conftest.py                               | 5 ++++-
 .../{test_flash_gemma.json => test_flash_gemma_simple.json} | 0
 .../{test_flash_llama.json => test_flash_llama_simple.json} | 0
 integration-tests/models/test_flash_gemma.py                | 2 +-
 integration-tests/models/test_flash_llama.py                | 2 +-
 6 files changed, 9 insertions(+), 6 deletions(-)
 rename integration-tests/models/__snapshots__/test_flash_gemma/{test_flash_gemma.json => test_flash_gemma_simple.json} (100%)
 rename integration-tests/models/__snapshots__/test_flash_llama/{test_flash_llama.json => test_flash_llama_simple.json} (100%)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index d0aaea27..c563fa27 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -75,10 +75,10 @@ jobs:
                 export label_extension="-intel-cpu"
                 export docker_devices="none"
                 export docker_volume="/mnt/cache"
-                export runs_on="ubuntu-latest"
-                # export runs_on="aws-highmemory-32-plus-priv"
+                # export runs_on="ubuntu-latest"
+                export runs_on="aws-highmemory-32-plus-priv"
                 export platform="cpu"
-                export extra_pytest="-k test_flash_llama_load"
+                export extra_pytest="-k test_flash_gemma_simple"
                 ;;
           esac
           echo $dockerfile
diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py
index dbe69244..f24fc079 100644
--- a/integration-tests/conftest.py
+++ b/integration-tests/conftest.py
@@ -572,7 +572,10 @@ def launcher(event_loop):
             print(container_output, file=sys.stderr)
 
         finally:
-            container.remove()
+            try:
+                container.remove()
+            except Exception:
+                pass
 
     if DOCKER_IMAGE is not None:
         return docker_launcher
diff --git a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_simple.json
similarity index 100%
rename from integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma.json
rename to integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_simple.json
diff --git a/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json b/integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_simple.json
similarity index 100%
rename from integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama.json
rename to integration-tests/models/__snapshots__/test_flash_llama/test_flash_llama_simple.json
diff --git a/integration-tests/models/test_flash_gemma.py b/integration-tests/models/test_flash_gemma.py
index 7bee8dea..4bd7bd14 100644
--- a/integration-tests/models/test_flash_gemma.py
+++ b/integration-tests/models/test_flash_gemma.py
@@ -16,7 +16,7 @@ async def flash_gemma(flash_gemma_handle):
 @pytest.mark.release
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_gemma(flash_gemma, response_snapshot):
+async def test_flash_gemma_simple(flash_gemma, response_snapshot):
     response = await flash_gemma.generate(
         "Test request", max_new_tokens=10, decoder_input_details=True
     )
diff --git a/integration-tests/models/test_flash_llama.py b/integration-tests/models/test_flash_llama.py
index c69314ff..bf49dc0b 100644
--- a/integration-tests/models/test_flash_llama.py
+++ b/integration-tests/models/test_flash_llama.py
@@ -15,7 +15,7 @@ async def flash_llama(flash_llama_handle):
 
 @pytest.mark.asyncio
 @pytest.mark.private
-async def test_flash_llama(flash_llama, response_snapshot):
+async def test_flash_llama_simple(flash_llama, response_snapshot):
     response = await flash_llama.generate(
         "Test request", max_new_tokens=10, decoder_input_details=True
     )