From a29893486e3af0b0041b380910e80bc96180947c Mon Sep 17 00:00:00 2001 From: Jason Cheng Date: Sat, 24 Feb 2024 15:42:56 +0800 Subject: [PATCH] Added test cases --- .../test_flash_qwen2/test_flash_qwen2.json | 84 +++++ .../test_flash_qwen2_all_params.json | 84 +++++ .../test_flash_qwen2_load.json | 338 ++++++++++++++++++ integration-tests/models/test_flash_qwen2.py | 61 ++++ 4 files changed, 567 insertions(+) create mode 100644 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json create mode 100644 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json create mode 100644 integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json create mode 100644 integration-tests/models/test_flash_qwen2.py diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json new file mode 100644 index 00000000..2e3906ce --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2.json @@ -0,0 +1,84 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 2271, + "text": "Test", + "logprob": null + }, + { + "id": 1681, + "text": " request", + "logprob": -7.0351562 + } + ], + "seed": null, + "tokens": [ + { + "id": 369, + "text": " for", + "logprob": -2.1914062, + "special": false + }, + { + "id": 279, + "text": " the", + "logprob": -2.6210938, + "special": false + }, + { + "id": 2701, + "text": " following", + "logprob": -3.6445312, + "special": false + }, + { + "id": 729, + "text": " function", + "logprob": -2.9648438, + "special": false + }, + { + "id": 271, + "text": "\n\n", + "logprob": -1.9111328, + "special": false + }, + { + "id": 31946, + "text": "Inputs", + "logprob": -1.6855469, + "special": false + }, + { + "id": 25, + "text": ":", + "logprob": -1.6093254e-05, + "special": false + }, + { + "id": 707, + "text": " def", + "logprob": -0.5678711, + "special": false + }, + { + "id": 1477, + "text": " find", + "logprob": -2.5917969, + "special": false + }, + { + "id": 6345, + "text": "_max", + "logprob": -1.8349609, + "special": false + } + ], + "top_tokens": null + }, + "generated_text": " for the following function\n\nInputs: def find_max" +} diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json new file mode 100644 index 00000000..bdaab6f2 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_all_params.json @@ -0,0 +1,84 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 2271, + "text": "Test", + "logprob": null + }, + { + "id": 1681, + "text": " request", + "logprob": -7.0351562 + } + ], + "seed": 0, + "tokens": [ + { + "id": 311, + "text": " to", + "logprob": -1.4472656, + "special": false + }, + { + "id": 633, + "text": " get", + "logprob": -0.4741211, + "special": false + }, + { + "id": 264, + "text": " a", + "logprob": 0.0, + "special": false + }, + { + "id": 1140, + "text": " list", + "logprob": 0.0, + "special": false + }, + { + "id": 315, + "text": " of", + "logprob": 0.0, + "special": false + }, + { + "id": 678, + "text": " all", + "logprob": 0.0, + "special": false + }, + { + "id": 279, + "text": " the", + "logprob": -0.2590332, + "special": false + }, + { + "id": 3847, + "text": " users", + "logprob": -0.45239258, + "special": false + }, + { + "id": 304, + "text": " in", + "logprob": -0.12322998, + "special": false + }, + { + "id": 419, + "text": " this", + "logprob": -1.7275391, + "special": false + } + ], + "top_tokens": null + }, + "generated_text": "Test request to get a list of all the users in this" +} diff --git a/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json new file mode 100644 index 00000000..6f0b21e9 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_qwen2/test_flash_qwen2_load.json @@ -0,0 +1,338 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 2271, + "text": "Test", + "logprob": null + }, + { + "id": 1681, + "text": " request", + "logprob": -7.0351562 + } + ], + "seed": null, + "tokens": [ + { + "id": 369, + "text": " for", + "logprob": -2.1914062, + "special": false + }, + { + "id": 279, + "text": " the", + "logprob": -2.6210938, + "special": false + }, + { + "id": 2701, + "text": " following", + "logprob": -3.6445312, + "special": false + }, + { + "id": 729, + "text": " function", + "logprob": -2.9648438, + "special": false + }, + { + "id": 271, + "text": "\n\n", + "logprob": -1.9111328, + "special": false + }, + { + "id": 31946, + "text": "Inputs", + "logprob": -1.6855469, + "special": false + }, + { + "id": 25, + "text": ":", + "logprob": -1.6093254e-05, + "special": false + }, + { + "id": 707, + "text": " def", + "logprob": -0.5678711, + "special": false + }, + { + "id": 1477, + "text": " find", + "logprob": -2.5917969, + "special": false + }, + { + "id": 6345, + "text": "_max", + "logprob": -1.8349609, + "special": false + } + ], + "top_tokens": null + }, + "generated_text": " for the following function\n\nInputs: def find_max" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 2271, + "text": "Test", + "logprob": null + }, + { + "id": 1681, + "text": " request", + "logprob": -7.0351562 + } + ], + "seed": null, + "tokens": [ + { + "id": 369, + "text": " for", + "logprob": -2.1914062, + "special": false + }, + { + "id": 279, + "text": " the", + "logprob": -2.6210938, + "special": false + }, + { + "id": 2701, + "text": " following", + "logprob": -3.6445312, + "special": false + }, + { + "id": 729, + "text": " function", + "logprob": -2.9648438, + "special": false + }, + { + "id": 271, + "text": "\n\n", + "logprob": -1.9111328, + "special": false + }, + { + "id": 31946, + "text": "Inputs", + "logprob": -1.6855469, + "special": false + }, + { + "id": 25, + "text": ":", + "logprob": -1.6093254e-05, + "special": false + }, + { + "id": 707, + "text": " def", + "logprob": -0.5678711, + "special": false + }, + { + "id": 1477, + "text": " find", + "logprob": -2.5917969, + "special": false + }, + { + "id": 6345, + "text": "_max", + "logprob": -1.8349609, + "special": false + } + ], + "top_tokens": null + }, + "generated_text": " for the following function\n\nInputs: def find_max" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 2271, + "text": "Test", + "logprob": null + }, + { + "id": 1681, + "text": " request", + "logprob": -7.0351562 + } + ], + "seed": null, + "tokens": [ + { + "id": 369, + "text": " for", + "logprob": -2.1914062, + "special": false + }, + { + "id": 279, + "text": " the", + "logprob": -2.6210938, + "special": false + }, + { + "id": 2701, + "text": " following", + "logprob": -3.6445312, + "special": false + }, + { + "id": 729, + "text": " function", + "logprob": -2.9648438, + "special": false + }, + { + "id": 271, + "text": "\n\n", + "logprob": -1.9111328, + "special": false + }, + { + "id": 31946, + "text": "Inputs", + "logprob": -1.6855469, + "special": false + }, + { + "id": 25, + "text": ":", + "logprob": -1.6093254e-05, + "special": false + }, + { + "id": 707, + "text": " def", + "logprob": -0.5678711, + "special": false + }, + { + "id": 1477, + "text": " find", + "logprob": -2.5917969, + "special": false + }, + { + "id": 6345, + "text": "_max", + "logprob": -1.8349609, + "special": false + } + ], + "top_tokens": null + }, + "generated_text": " for the following function\n\nInputs: def find_max" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 2271, + "text": "Test", + "logprob": null + }, + { + "id": 1681, + "text": " request", + "logprob": -7.0351562 + } + ], + "seed": null, + "tokens": [ + { + "id": 369, + "text": " for", + "logprob": -2.1914062, + "special": false + }, + { + "id": 279, + "text": " the", + "logprob": -2.6210938, + "special": false + }, + { + "id": 2701, + "text": " following", + "logprob": -3.6445312, + "special": false + }, + { + "id": 729, + "text": " function", + "logprob": -2.9648438, + "special": false + }, + { + "id": 271, + "text": "\n\n", + "logprob": -1.9111328, + "special": false + }, + { + "id": 31946, + "text": "Inputs", + "logprob": -1.6855469, + "special": false + }, + { + "id": 25, + "text": ":", + "logprob": -1.6093254e-05, + "special": false + }, + { + "id": 707, + "text": " def", + "logprob": -0.5678711, + "special": false + }, + { + "id": 1477, + "text": " find", + "logprob": -2.5917969, + "special": false + }, + { + "id": 6345, + "text": "_max", + "logprob": -1.8349609, + "special": false + } + ], + "top_tokens": null + }, + "generated_text": " for the following function\n\nInputs: def find_max" + } +] diff --git a/integration-tests/models/test_flash_qwen2.py b/integration-tests/models/test_flash_qwen2.py new file mode 100644 index 00000000..e07ed553 --- /dev/null +++ b/integration-tests/models/test_flash_qwen2.py @@ -0,0 +1,61 @@ +import pytest + + +@pytest.fixture(scope="module") +def flash_qwen2_handle(launcher): + with launcher("Qwen/Qwen1.5-7B") as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_qwen2(flash_qwen2_handle): + await flash_qwen2_handle.health(300) + return flash_qwen2_handle.client + + +@pytest.mark.asyncio +async def test_flash_qwen2(flash_qwen2, response_snapshot): + response = await flash_qwen2.generate( + "Test request", max_new_tokens=10, decoder_input_details=True + ) + + assert response.details.generated_tokens == 10 + assert response.generated_text == " for the following function\n\nInputs: def find_max" + assert response == response_snapshot + + +@pytest.mark.asyncio +async def test_flash_qwen2_all_params(flash_qwen2, response_snapshot): + response = await flash_qwen2.generate( + "Test request", + max_new_tokens=10, + repetition_penalty=1.2, + return_full_text=True, + stop_sequences=["test"], + temperature=0.5, + top_p=0.9, + top_k=10, + truncate=5, + typical_p=0.9, + watermark=True, + decoder_input_details=True, + seed=0, + ) + + assert response.details.generated_tokens == 10 + assert response == response_snapshot + + +@pytest.mark.asyncio +async def test_flash_qwen2_load(flash_qwen2, generate_load, response_snapshot): + responses = await generate_load( + flash_qwen2, "Test request", max_new_tokens=10, n=4 + ) + + assert len(responses) == 4 + assert all( + [r.generated_text == responses[0].generated_text for r in responses] + ), f"{[r.generated_text for r in responses]}" + assert responses[0].generated_text == ": Let n = 10 - 1" + + assert responses == response_snapshot