diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq.json b/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq.json new file mode 100644 index 00000000..9ca22e10 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq.json @@ -0,0 +1,104 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1824, + "logprob": -12.296875, + "text": "What" + }, + { + "id": 349, + "logprob": -0.97216797, + "text": "is" + }, + { + "id": 3534, + "logprob": -10.1796875, + "text": "deep" + }, + { + "id": 5168, + "logprob": -0.9658203, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.44384766, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 13, + "logprob": -0.50878906, + "special": false, + "text": "\n" + }, + { + "id": 13, + "logprob": -0.8876953, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.15124512, + "special": false, + "text": "Deep" + }, + { + "id": 5168, + "logprob": -0.030288696, + "special": false, + "text": " learning" + }, + { + "id": 349, + "logprob": -0.16687012, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": -0.17858887, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": -0.8046875, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": -0.007205963, + "special": false, + "text": " of" + }, + { + "id": 5599, + "logprob": -0.090026855, + "special": false, + "text": " machine" + }, + { + "id": 5168, + "logprob": -0.0030670166, + "special": false, + "text": " learning" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a subset of machine learning" +} diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json b/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json new file mode 100644 index 00000000..38ab7263 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_all_params.json @@ -0,0 +1,99 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 349, + "logprob": -13.921875, + "text": "is" + }, + { + "id": 3534, + "logprob": -11.2265625, + "text": "deep" + }, + { + "id": 5168, + "logprob": -2.3886719, + "text": "learning" + }, + { + "id": 28804, + "logprob": -4.7109375, + "text": "?" + } + ], + "seed": 0, + "tokens": [ + { + "id": 13, + "logprob": 0.0, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.5229492, + "special": false, + "text": "Deep" + }, + { + "id": 17504, + "logprob": 0.0, + "special": false, + "text": " Learning" + }, + { + "id": 349, + "logprob": -0.5151367, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": 0.0, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": 0.0, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": 0.0, + "special": false, + "text": " of" + }, + { + "id": 13253, + "logprob": -1.3359375, + "special": false, + "text": " Machine" + }, + { + "id": 17504, + "logprob": 0.0, + "special": false, + "text": " Learning" + }, + { + "id": 28725, + "logprob": 0.0, + "special": false, + "text": "," + } + ], + "top_tokens": null + }, + "generated_text": "What is deep learning?\nDeep Learning is a subset of Machine Learning," +} diff --git a/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json b/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json new file mode 100644 index 00000000..329d73ee --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_mixtral_awq/test_flash_mixtral_awq_load.json @@ -0,0 +1,418 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1824, + "logprob": -12.296875, + "text": "What" + }, + { + "id": 349, + "logprob": -0.97216797, + "text": "is" + }, + { + "id": 3534, + "logprob": -10.1796875, + "text": "deep" + }, + { + "id": 5168, + "logprob": -0.9658203, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.44384766, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 13, + "logprob": -0.50878906, + "special": false, + "text": "\n" + }, + { + "id": 13, + "logprob": -0.8876953, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.15136719, + "special": false, + "text": "Deep" + }, + { + "id": 5168, + "logprob": -0.030273438, + "special": false, + "text": " learning" + }, + { + "id": 349, + "logprob": -0.1665039, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": -0.1776123, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": -0.8076172, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": -0.007183075, + "special": false, + "text": " of" + }, + { + "id": 5599, + "logprob": -0.090148926, + "special": false, + "text": " machine" + }, + { + "id": 5168, + "logprob": -0.0030670166, + "special": false, + "text": " learning" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a subset of machine learning" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1824, + "logprob": -12.34375, + "text": "What" + }, + { + "id": 349, + "logprob": -0.96728516, + "text": "is" + }, + { + "id": 3534, + "logprob": -10.1796875, + "text": "deep" + }, + { + "id": 5168, + "logprob": -0.97265625, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.44189453, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 13, + "logprob": -0.51220703, + "special": false, + "text": "\n" + }, + { + "id": 13, + "logprob": -0.87402344, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.15039062, + "special": false, + "text": "Deep" + }, + { + "id": 5168, + "logprob": -0.030288696, + "special": false, + "text": " learning" + }, + { + "id": 349, + "logprob": -0.1652832, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": -0.17858887, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": -0.81103516, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": -0.007183075, + "special": false, + "text": " of" + }, + { + "id": 5599, + "logprob": -0.08880615, + "special": false, + "text": " machine" + }, + { + "id": 5168, + "logprob": -0.0030612946, + "special": false, + "text": " learning" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a subset of machine learning" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1824, + "logprob": -12.34375, + "text": "What" + }, + { + "id": 349, + "logprob": -0.96728516, + "text": "is" + }, + { + "id": 3534, + "logprob": -10.1796875, + "text": "deep" + }, + { + "id": 5168, + "logprob": -0.97265625, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.44189453, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 13, + "logprob": -0.51220703, + "special": false, + "text": "\n" + }, + { + "id": 13, + "logprob": -0.87402344, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.15039062, + "special": false, + "text": "Deep" + }, + { + "id": 5168, + "logprob": -0.030288696, + "special": false, + "text": " learning" + }, + { + "id": 349, + "logprob": -0.1652832, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": -0.17858887, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": -0.81103516, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": -0.007183075, + "special": false, + "text": " of" + }, + { + "id": 5599, + "logprob": -0.08880615, + "special": false, + "text": " machine" + }, + { + "id": 5168, + "logprob": -0.0030612946, + "special": false, + "text": " learning" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a subset of machine learning" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 1824, + "logprob": -12.34375, + "text": "What" + }, + { + "id": 349, + "logprob": -0.96728516, + "text": "is" + }, + { + "id": 3534, + "logprob": -10.1796875, + "text": "deep" + }, + { + "id": 5168, + "logprob": -0.97265625, + "text": "learning" + }, + { + "id": 28804, + "logprob": -0.44189453, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 13, + "logprob": -0.51220703, + "special": false, + "text": "\n" + }, + { + "id": 13, + "logprob": -0.87402344, + "special": false, + "text": "\n" + }, + { + "id": 23229, + "logprob": -0.15039062, + "special": false, + "text": "Deep" + }, + { + "id": 5168, + "logprob": -0.030288696, + "special": false, + "text": " learning" + }, + { + "id": 349, + "logprob": -0.1652832, + "special": false, + "text": " is" + }, + { + "id": 264, + "logprob": -0.17858887, + "special": false, + "text": " a" + }, + { + "id": 19804, + "logprob": -0.81103516, + "special": false, + "text": " subset" + }, + { + "id": 302, + "logprob": -0.007183075, + "special": false, + "text": " of" + }, + { + "id": 5599, + "logprob": -0.08880615, + "special": false, + "text": " machine" + }, + { + "id": 5168, + "logprob": -0.0030612946, + "special": false, + "text": " learning" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a subset of machine learning" + } +] diff --git a/integration-tests/models/test_flash_mixtral_awq.py b/integration-tests/models/test_flash_mixtral_awq.py new file mode 100644 index 00000000..ab1e0f00 --- /dev/null +++ b/integration-tests/models/test_flash_mixtral_awq.py @@ -0,0 +1,73 @@ +import pytest + + +@pytest.fixture(scope="module") +def flash_mixtral_awq_handle(launcher): + with launcher("casperhansen/mixtral-instruct-awq", num_shard=2) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_mixtral_awq(flash_mixtral_awq_handle): + await flash_mixtral_awq_handle.health(300) + return flash_mixtral_awq_handle.client + + +@pytest.mark.asyncio +async def test_flash_mixtral_awq(flash_mixtral_awq, response_snapshot): + response = await flash_mixtral_awq.generate( + "What is deep learning?", max_new_tokens=10, decoder_input_details=True + ) + + assert response.details.generated_tokens == 10 + assert ( + response.generated_text == "\n\nDeep learning is a subset of machine learning" + ) + assert response == response_snapshot + + +@pytest.mark.asyncio +async def test_flash_mixtral_awq_all_params(flash_mixtral_awq, response_snapshot): + response = await flash_mixtral_awq.generate( + "What is deep learning?", + max_new_tokens=10, + repetition_penalty=1.2, + return_full_text=True, + stop_sequences=["test"], + temperature=0.5, + top_p=0.9, + top_k=10, + truncate=5, + typical_p=0.9, + watermark=True, + decoder_input_details=True, + seed=0, + ) + + assert response.details.generated_tokens == 10 + assert ( + response.generated_text + == "What is deep learning?\nDeep Learning is a subset of Machine Learning," + ) + assert response == response_snapshot + + +@pytest.mark.asyncio +async def test_flash_mixtral_awq_load( + flash_mixtral_awq, generate_load, response_snapshot +): + responses = await generate_load( + flash_mixtral_awq, "What is deep learning?", max_new_tokens=10, n=4 + ) + + assert len(responses) == 4 + assert responses[0].details.generated_tokens == 10 + assert ( + responses[0].generated_text + == "\n\nDeep learning is a subset of machine learning" + ) + assert all( + [r.generated_text == responses[0].generated_text for r in responses] + ), f"{[r.generated_text for r in responses]}" + + assert responses == response_snapshot