diff --git a/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral.json b/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral.json new file mode 100644 index 00000000..4e7de9a6 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral.json @@ -0,0 +1,89 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 3735, + "logprob": -12.9140625, + "text": "Test" + }, + { + "id": 2159, + "logprob": -10.7578125, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 28747, + "logprob": -0.54785156, + "special": false, + "text": ":" + }, + { + "id": 3169, + "logprob": -1.4091797, + "special": false, + "text": " Let" + }, + { + "id": 307, + "logprob": -3.0273438, + "special": false, + "text": " n" + }, + { + "id": 327, + "logprob": -0.94433594, + "special": false, + "text": " =" + }, + { + "id": 28705, + "logprob": -0.81347656, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.2958984, + "special": false, + "text": "1" + }, + { + "id": 28734, + "logprob": -2.0644531, + "special": false, + "text": "0" + }, + { + "id": 387, + "logprob": -1.9580078, + "special": false, + "text": " -" + }, + { + "id": 28705, + "logprob": -0.5073242, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.1816406, + "special": false, + "text": "1" + } + ], + "top_tokens": null + }, + "generated_text": ": Let n = 10 - 1" +} diff --git a/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_all_params.json b/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_all_params.json new file mode 100644 index 00000000..c0dc6471 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_all_params.json @@ -0,0 +1,89 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 3735, + "logprob": -12.9140625, + "text": "Test" + }, + { + "id": 2159, + "logprob": -10.7578125, + "text": "request" + } + ], + "seed": 0, + "tokens": [ + { + "id": 28747, + "logprob": 0.0, + "special": false, + "text": ":" + }, + { + "id": 3169, + "logprob": -0.1307373, + "special": false, + "text": " Let" + }, + { + "id": 332, + "logprob": -2.3359375, + "special": false, + "text": " u" + }, + { + "id": 347, + "logprob": 0.0, + "special": false, + "text": " be" + }, + { + "id": 325, + "logprob": -1.0234375, + "special": false, + "text": " (" + }, + { + "id": 28734, + "logprob": -2.0292969, + "special": false, + "text": "0" + }, + { + "id": 648, + "logprob": -1.0439453, + "special": false, + "text": " +" + }, + { + "id": 28705, + "logprob": -0.24499512, + "special": false, + "text": " " + }, + { + "id": 28770, + "logprob": -0.5073242, + "special": false, + "text": "3" + }, + { + "id": 387, + "logprob": -1.5507812, + "special": false, + "text": " -" + } + ], + "top_tokens": null + }, + "generated_text": "Test request: Let u be (0 + 3 -" +} diff --git a/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_load.json b/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_load.json new file mode 100644 index 00000000..9d133077 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_mistral/test_flash_mistral_load.json @@ -0,0 +1,358 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 3735, + "logprob": -12.9140625, + "text": "Test" + }, + { + "id": 2159, + "logprob": -10.7578125, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 28747, + "logprob": -0.55078125, + "special": false, + "text": ":" + }, + { + "id": 3169, + "logprob": -1.4140625, + "special": false, + "text": " Let" + }, + { + "id": 307, + "logprob": -3.0273438, + "special": false, + "text": " n" + }, + { + "id": 327, + "logprob": -0.94140625, + "special": false, + "text": " =" + }, + { + "id": 28705, + "logprob": -0.8173828, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.2978516, + "special": false, + "text": "1" + }, + { + "id": 28734, + "logprob": -2.0664062, + "special": false, + "text": "0" + }, + { + "id": 387, + "logprob": -1.9560547, + "special": false, + "text": " -" + }, + { + "id": 28705, + "logprob": -0.5078125, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.1787109, + "special": false, + "text": "1" + } + ], + "top_tokens": null + }, + "generated_text": ": Let n = 10 - 1" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 3735, + "logprob": -12.9140625, + "text": "Test" + }, + { + "id": 2159, + "logprob": -10.7578125, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 28747, + "logprob": -0.54785156, + "special": false, + "text": ":" + }, + { + "id": 3169, + "logprob": -1.4111328, + "special": false, + "text": " Let" + }, + { + "id": 307, + "logprob": -3.0292969, + "special": false, + "text": " n" + }, + { + "id": 327, + "logprob": -0.94433594, + "special": false, + "text": " =" + }, + { + "id": 28705, + "logprob": -0.8178711, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.2939453, + "special": false, + "text": "1" + }, + { + "id": 28734, + "logprob": -2.0644531, + "special": false, + "text": "0" + }, + { + "id": 387, + "logprob": -1.9550781, + "special": false, + "text": " -" + }, + { + "id": 28705, + "logprob": -0.5078125, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.1796875, + "special": false, + "text": "1" + } + ], + "top_tokens": null + }, + "generated_text": ": Let n = 10 - 1" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 3735, + "logprob": -12.9140625, + "text": "Test" + }, + { + "id": 2159, + "logprob": -10.7578125, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 28747, + "logprob": -0.55078125, + "special": false, + "text": ":" + }, + { + "id": 3169, + "logprob": -1.4140625, + "special": false, + "text": " Let" + }, + { + "id": 307, + "logprob": -3.0273438, + "special": false, + "text": " n" + }, + { + "id": 327, + "logprob": -0.94140625, + "special": false, + "text": " =" + }, + { + "id": 28705, + "logprob": -0.8173828, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.2978516, + "special": false, + "text": "1" + }, + { + "id": 28734, + "logprob": -2.0664062, + "special": false, + "text": "0" + }, + { + "id": 387, + "logprob": -1.9560547, + "special": false, + "text": " -" + }, + { + "id": 28705, + "logprob": -0.5078125, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.1787109, + "special": false, + "text": "1" + } + ], + "top_tokens": null + }, + "generated_text": ": Let n = 10 - 1" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 3735, + "logprob": -12.9140625, + "text": "Test" + }, + { + "id": 2159, + "logprob": -10.7578125, + "text": "request" + } + ], + "seed": null, + "tokens": [ + { + "id": 28747, + "logprob": -0.55078125, + "special": false, + "text": ":" + }, + { + "id": 3169, + "logprob": -1.4140625, + "special": false, + "text": " Let" + }, + { + "id": 307, + "logprob": -3.0273438, + "special": false, + "text": " n" + }, + { + "id": 327, + "logprob": -0.94140625, + "special": false, + "text": " =" + }, + { + "id": 28705, + "logprob": -0.8173828, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.2978516, + "special": false, + "text": "1" + }, + { + "id": 28734, + "logprob": -2.0664062, + "special": false, + "text": "0" + }, + { + "id": 387, + "logprob": -1.9560547, + "special": false, + "text": " -" + }, + { + "id": 28705, + "logprob": -0.5078125, + "special": false, + "text": " " + }, + { + "id": 28740, + "logprob": -1.1787109, + "special": false, + "text": "1" + } + ], + "top_tokens": null + }, + "generated_text": ": Let n = 10 - 1" + } +] diff --git a/integration-tests/models/test_flash_mistral.py b/integration-tests/models/test_flash_mistral.py index 0a1c76ba..63cb09b5 100644 --- a/integration-tests/models/test_flash_mistral.py +++ b/integration-tests/models/test_flash_mistral.py @@ -13,7 +13,6 @@ async def flash_mistral(flash_mistral_handle): return flash_mistral_handle.client -@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_mistral(flash_mistral, response_snapshot): @@ -25,7 +24,6 @@ async def test_flash_mistral(flash_mistral, response_snapshot): assert response == response_snapshot -@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_mistral_all_params(flash_mistral, response_snapshot): @@ -45,11 +43,10 @@ async def test_flash_mistral_all_params(flash_mistral, response_snapshot): seed=0, ) - assert response.details.generated_tokens == 5 + assert response.details.generated_tokens == 10 assert response == response_snapshot -@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_mistral_load(flash_mistral, generate_load, response_snapshot):