diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index c563fa27..bd76cb42 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -202,4 +202,4 @@ jobs: export EXTRA_PYTEST="${{ needs.build-and-push.outputs.extra_pytest }}" export HF_TOKEN=${{ secrets.HF_TOKEN }} echo $DOCKER_IMAGE - pytest -s -vv integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST} + pytest -s -vvx integration-tests ${PYTEST_FLAGS} ${EXTRA_PYTEST} diff --git a/integration-tests/models/test_bloom_560m.py b/integration-tests/models/test_bloom_560m.py index 07463904..c7859181 100644 --- a/integration-tests/models/test_bloom_560m.py +++ b/integration-tests/models/test_bloom_560m.py @@ -1,67 +1,67 @@ -# import pytest -# -# -# @pytest.fixture(scope="module") -# def bloom_560_handle(launcher): -# with launcher("bigscience/bloom-560m", num_shard=1) as handle: -# yield handle -# -# -# @pytest.fixture(scope="module") -# async def bloom_560(bloom_560_handle): -# await bloom_560_handle.health(240) -# return bloom_560_handle.client -# -# -# @pytest.mark.release -# @pytest.mark.asyncio -# async def test_bloom_560m(bloom_560, response_snapshot): -# response = await bloom_560.generate( -# "Pour déguster un ortolan, il faut tout d'abord", -# max_new_tokens=10, -# top_p=0.9, -# decoder_input_details=True, -# seed=0, -# ) -# -# assert response.details.generated_tokens == 10 -# assert response == response_snapshot -# -# -# @pytest.mark.release -# @pytest.mark.asyncio -# async def test_bloom_560m_all_params(bloom_560, response_snapshot): -# response = await bloom_560.generate( -# "Pour déguster un ortolan, il faut tout d'abord", -# max_new_tokens=10, -# repetition_penalty=1.2, -# return_full_text=True, -# stop_sequences=["test"], -# temperature=0.5, -# top_p=0.9, -# top_k=10, -# truncate=5, -# typical_p=0.9, -# watermark=True, -# decoder_input_details=True, -# seed=0, -# ) -# -# assert response.details.generated_tokens == 10 -# assert response == response_snapshot -# -# -# @pytest.mark.release -# @pytest.mark.asyncio -# async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot): -# responses = await generate_load( -# bloom_560, -# "Pour déguster un ortolan, il faut tout d'abord", -# max_new_tokens=10, -# n=4, -# ) -# -# assert len(responses) == 4 -# assert all([r.generated_text == responses[0].generated_text for r in responses]) -# -# assert responses == response_snapshot +import pytest + + +@pytest.fixture(scope="module") +def bloom_560_handle(launcher): + with launcher("bigscience/bloom-560m", num_shard=1) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def bloom_560(bloom_560_handle): + await bloom_560_handle.health(240) + return bloom_560_handle.client + + +@pytest.mark.release +@pytest.mark.asyncio +async def test_bloom_560m(bloom_560, response_snapshot): + response = await bloom_560.generate( + "Pour déguster un ortolan, il faut tout d'abord", + max_new_tokens=10, + top_p=0.9, + decoder_input_details=True, + seed=0, + ) + + assert response.details.generated_tokens == 10 + assert response == response_snapshot + + +@pytest.mark.release +@pytest.mark.asyncio +async def test_bloom_560m_all_params(bloom_560, response_snapshot): + response = await bloom_560.generate( + "Pour déguster un ortolan, il faut tout d'abord", + max_new_tokens=10, + repetition_penalty=1.2, + return_full_text=True, + stop_sequences=["test"], + temperature=0.5, + top_p=0.9, + top_k=10, + truncate=5, + typical_p=0.9, + watermark=True, + decoder_input_details=True, + seed=0, + ) + + assert response.details.generated_tokens == 10 + assert response == response_snapshot + + +@pytest.mark.release +@pytest.mark.asyncio +async def test_bloom_560m_load(bloom_560, generate_load, response_snapshot): + responses = await generate_load( + bloom_560, + "Pour déguster un ortolan, il faut tout d'abord", + max_new_tokens=10, + n=4, + ) + + assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) + + assert responses == response_snapshot