Let's try non sharded gemma.

2025-09-11 20:34:54 +00:00 · 2024-10-09 16:37:10 +02:00 · 2024-10-09 16:37:10 +02:00 · 1e03ea96d0
commit 1e03ea96d0
parent 15e178e3ad
2 changed files with 2 additions and 2 deletions
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@ -78,7 +78,7 @@ jobs:
                # export runs_on="ubuntu-latest"
                export runs_on="aws-highmemory-32-plus-priv"
                export platform="cpu"
-                export extra_pytest="-k test_flash_llama_simple"
+                export extra_pytest="-k test_flash_gemma_simple"
                ;;
          esac
          echo $dockerfile
--- a/integration-tests/models/test_flash_gemma.py
+++ b/integration-tests/models/test_flash_gemma.py
@ -16,7 +16,7 @@ async def flash_gemma(flash_gemma_handle):
@pytest.mark.release
@pytest.mark.asyncio
@pytest.mark.private
-async def test_flash_gemma(flash_gemma, response_snapshot):
+async def test_flash_gemma_simple(flash_gemma, response_snapshot):
    response = await flash_gemma.generate(
        "Test request", max_new_tokens=10, decoder_input_details=True
    )