diff --git a/integration-tests/models/test_flash_grammar_llama.py b/integration-tests/models/test_flash_grammar_llama.py index 585d0656..ce1cf787 100644 --- a/integration-tests/models/test_flash_grammar_llama.py +++ b/integration-tests/models/test_flash_grammar_llama.py @@ -28,6 +28,7 @@ async def test_flash_llama_grammar(flash_llama_grammar, response_snapshot): assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio async def test_flash_llama_grammar_regex(flash_llama_grammar, response_snapshot): response = await flash_llama_grammar.generate( @@ -46,6 +47,7 @@ async def test_flash_llama_grammar_regex(flash_llama_grammar, response_snapshot) assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio async def test_flash_llama_grammar_json(flash_llama_grammar, response_snapshot): response = await flash_llama_grammar.generate( @@ -94,6 +96,7 @@ async def test_flash_llama_grammar_json(flash_llama_grammar, response_snapshot): assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio async def test_flash_llama_grammar_load( flash_llama_grammar, generate_load, response_snapshot @@ -125,6 +128,7 @@ async def test_flash_llama_grammar_load( # this is the same as the above test, but only fires off a single request # this is only to ensure that the parallel and single inference produce the same result +@pytest.mark.skip @pytest.mark.asyncio async def test_flash_llama_grammar_single_load_instance( flash_llama_grammar, generate_load, response_snapshot diff --git a/integration-tests/models/test_grammar_llama.py b/integration-tests/models/test_grammar_llama.py index da46352f..ce5da8a9 100644 --- a/integration-tests/models/test_grammar_llama.py +++ b/integration-tests/models/test_grammar_llama.py @@ -21,6 +21,7 @@ async def non_flash_llama_grammar(non_flash_llama_grammar_handle): return non_flash_llama_grammar_handle.client +@pytest.mark.skip @pytest.mark.asyncio async def test_non_flash_llama_grammar_json(non_flash_llama_grammar, response_snapshot): response = await non_flash_llama_grammar.generate( diff --git a/integration-tests/models/test_tools_llama.py b/integration-tests/models/test_tools_llama.py index 38570c38..21bcbb52 100644 --- a/integration-tests/models/test_tools_llama.py +++ b/integration-tests/models/test_tools_llama.py @@ -98,6 +98,7 @@ async def test_flash_llama_grammar_no_tools( assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_snapshot): @@ -134,6 +135,7 @@ async def test_flash_llama_grammar_tools(flash_llama_grammar_tools, response_sna assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_grammar_tools_auto( @@ -173,6 +175,7 @@ async def test_flash_llama_grammar_tools_auto( assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_grammar_tools_choice( @@ -208,6 +211,7 @@ async def test_flash_llama_grammar_tools_choice( assert response == response_snapshot +@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_grammar_tools_stream(