diff --git a/integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba.json b/integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba.json deleted file mode 100644 index d75c959f..00000000 --- a/integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba.json +++ /dev/null @@ -1,84 +0,0 @@ -{ - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 5089, - "logprob": null, - "text": "Test" - }, - { - "id": 2748, - "logprob": -9.7265625, - "text": " request" - } - ], - "seed": null, - "tokens": [ - { - "id": 187, - "logprob": -2.4746094, - "special": false, - "text": "\n" - }, - { - "id": 50274, - "logprob": -1.7861328, - "special": false, - "text": " " - }, - { - "id": 92, - "logprob": -2.03125, - "special": false, - "text": "{" - }, - { - "id": 187, - "logprob": -0.048706055, - "special": false, - "text": "\n" - }, - { - "id": 50270, - "logprob": -0.19421387, - "special": false, - "text": " " - }, - { - "id": 3, - "logprob": -1.5097656, - "special": false, - "text": "\"" - }, - { - "id": 9629, - "logprob": -2.7792969, - "special": false, - "text": "request" - }, - { - "id": 1381, - "logprob": -0.78271484, - "special": false, - "text": "\":" - }, - { - "id": 551, - "logprob": -0.49487305, - "special": false, - "text": " {" - }, - { - "id": 187, - "logprob": -0.021087646, - "special": false, - "text": "\n" - } - ], - "top_tokens": null - }, - "generated_text": "\n {\n \"request\": {\n" -} diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba.json new file mode 100644 index 00000000..4435f215 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba.json @@ -0,0 +1,73 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "seed": null, + "tokens": [ + { + "id": 187, + "logprob": -0.3552246, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -0.38378906, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.140625, + "special": false, + "text": "Deep" + }, + { + "id": 4715, + "logprob": -0.5551758, + "special": false, + "text": " learning" + }, + { + "id": 310, + "logprob": -0.59033203, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.70654297, + "special": false, + "text": " a" + }, + { + "id": 747, + "logprob": -2.0410156, + "special": false, + "text": " new" + }, + { + "id": 1511, + "logprob": -2.3789062, + "special": false, + "text": " type" + }, + { + "id": 273, + "logprob": -0.0026435852, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.2841797, + "special": false, + "text": " machine" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a new type of machine" +} diff --git a/integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba_all_params.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json similarity index 100% rename from integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba_all_params.json rename to integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json diff --git a/integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba_load.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json similarity index 87% rename from integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba_load.json rename to integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json index 830b9f59..014210b2 100644 --- a/integration-tests/models/__snapshots__/test_fused_kernel_mamba/test_fused_kernel_mamba_load.json +++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json @@ -41,136 +41,37 @@ }, { "id": 187, - "logprob": -0.38989258, + "logprob": -0.38378906, "special": false, "text": "\n" }, { "id": 30763, - "logprob": -1.1386719, + "logprob": -1.1279297, "special": false, "text": "Deep" }, { "id": 4715, - "logprob": -0.5576172, + "logprob": -0.5595703, "special": false, "text": " learning" }, { "id": 310, - "logprob": -0.5913086, + "logprob": -0.60253906, "special": false, "text": " is" }, { "id": 247, - "logprob": -0.69970703, + "logprob": -0.7050781, "special": false, "text": " a" }, { "id": 747, - "logprob": -2.0449219, - "special": false, - "text": " new" - }, - { - "id": 1511, - "logprob": -2.3847656, - "special": false, - "text": " type" - }, - { - "id": 273, - "logprob": -0.0026626587, - "special": false, - "text": " of" - }, - { - "id": 5145, - "logprob": -1.2841797, - "special": false, - "text": " machine" - } - ], - "top_tokens": null - }, - "generated_text": "\n\nDeep learning is a new type of machine" - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 1276, - "logprob": null, - "text": "What" - }, - { - "id": 310, - "logprob": -0.78027344, - "text": " is" - }, - { - "id": 18147, - "logprob": -12.8203125, - "text": " Deep" - }, - { - "id": 20727, - "logprob": -2.9902344, - "text": " Learning" - }, - { - "id": 32, - "logprob": -1.1523438, - "text": "?" - } - ], - "seed": null, - "tokens": [ - { - "id": 187, - "logprob": -0.35351562, - "special": false, - "text": "\n" - }, - { - "id": 187, - "logprob": -0.38476562, - "special": false, - "text": "\n" - }, - { - "id": 30763, - "logprob": -1.1308594, - "special": false, - "text": "Deep" - }, - { - "id": 4715, - "logprob": -0.5522461, - "special": false, - "text": " learning" - }, - { - "id": 310, - "logprob": -0.59375, - "special": false, - "text": " is" - }, - { - "id": 247, - "logprob": -0.7036133, - "special": false, - "text": " a" - }, - { - "id": 747, - "logprob": -2.0507812, + "logprob": -2.0488281, "special": false, "text": " new" }, @@ -182,7 +83,7 @@ }, { "id": 273, - "logprob": -0.002664566, + "logprob": -0.0026416779, "special": false, "text": " of" }, @@ -239,49 +140,49 @@ }, { "id": 187, - "logprob": -0.38476562, + "logprob": -0.38256836, "special": false, "text": "\n" }, { "id": 30763, - "logprob": -1.1308594, + "logprob": -1.1269531, "special": false, "text": "Deep" }, { "id": 4715, - "logprob": -0.5522461, + "logprob": -0.54541016, "special": false, "text": " learning" }, { "id": 310, - "logprob": -0.59375, + "logprob": -0.59765625, "special": false, "text": " is" }, { "id": 247, - "logprob": -0.7036133, + "logprob": -0.7001953, "special": false, "text": " a" }, { "id": 747, - "logprob": -2.0507812, + "logprob": -2.0585938, "special": false, "text": " new" }, { "id": 1511, - "logprob": -2.3808594, + "logprob": -2.3789062, "special": false, "text": " type" }, { "id": 273, - "logprob": -0.002664566, + "logprob": -0.0027446747, "special": false, "text": " of" }, @@ -338,49 +239,148 @@ }, { "id": 187, - "logprob": -0.38476562, + "logprob": -0.38256836, "special": false, "text": "\n" }, { "id": 30763, - "logprob": -1.1308594, + "logprob": -1.1269531, "special": false, "text": "Deep" }, { "id": 4715, - "logprob": -0.5522461, + "logprob": -0.54541016, "special": false, "text": " learning" }, { "id": 310, - "logprob": -0.59375, + "logprob": -0.59765625, "special": false, "text": " is" }, { "id": 247, - "logprob": -0.7036133, + "logprob": -0.7001953, "special": false, "text": " a" }, { "id": 747, - "logprob": -2.0507812, + "logprob": -2.0585938, "special": false, "text": " new" }, { "id": 1511, - "logprob": -2.3808594, + "logprob": -2.3789062, "special": false, "text": " type" }, { "id": 273, - "logprob": -0.002664566, + "logprob": -0.0027446747, + "special": false, + "text": " of" + }, + { + "id": 5145, + "logprob": -1.2851562, + "special": false, + "text": " machine" + } + ], + "top_tokens": null + }, + "generated_text": "\n\nDeep learning is a new type of machine" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -0.78027344, + "text": " is" + }, + { + "id": 18147, + "logprob": -12.8203125, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -2.9902344, + "text": " Learning" + }, + { + "id": 32, + "logprob": -1.1523438, + "text": "?" + } + ], + "seed": null, + "tokens": [ + { + "id": 187, + "logprob": -0.35351562, + "special": false, + "text": "\n" + }, + { + "id": 187, + "logprob": -0.38256836, + "special": false, + "text": "\n" + }, + { + "id": 30763, + "logprob": -1.1269531, + "special": false, + "text": "Deep" + }, + { + "id": 4715, + "logprob": -0.54541016, + "special": false, + "text": " learning" + }, + { + "id": 310, + "logprob": -0.59765625, + "special": false, + "text": " is" + }, + { + "id": 247, + "logprob": -0.7001953, + "special": false, + "text": " a" + }, + { + "id": 747, + "logprob": -2.0585938, + "special": false, + "text": " new" + }, + { + "id": 1511, + "logprob": -2.3789062, + "special": false, + "text": " type" + }, + { + "id": 273, + "logprob": -0.0027446747, "special": false, "text": " of" }, diff --git a/integration-tests/models/test_fused_kernel_mamba.py b/integration-tests/models/test_mamba.py similarity index 87% rename from integration-tests/models/test_fused_kernel_mamba.py rename to integration-tests/models/test_mamba.py index 9bd0052f..d86faeff 100644 --- a/integration-tests/models/test_fused_kernel_mamba.py +++ b/integration-tests/models/test_mamba.py @@ -15,7 +15,7 @@ async def fused_kernel_mamba(fused_kernel_mamba_handle): @pytest.mark.asyncio @pytest.mark.private -async def test_fused_kernel_mamba(fused_kernel_mamba, response_snapshot): +async def test_mamba(fused_kernel_mamba, response_snapshot): response = await fused_kernel_mamba.generate( "What is Deep Learning?", max_new_tokens=10 ) @@ -24,10 +24,9 @@ async def test_fused_kernel_mamba(fused_kernel_mamba, response_snapshot): assert response.generated_text == "\n\nDeep learning is a new type of machine" assert response == response_snapshot - @pytest.mark.asyncio @pytest.mark.private -async def test_fused_kernel_mamba_all_params(fused_kernel_mamba, response_snapshot): +async def test_mamba_all_params(fused_kernel_mamba, response_snapshot): response = await fused_kernel_mamba.generate( "blue, red, yellow, ", max_new_tokens=10, @@ -50,7 +49,7 @@ async def test_fused_kernel_mamba_all_params(fused_kernel_mamba, response_snapsh @pytest.mark.asyncio @pytest.mark.private -async def test_fused_kernel_mamba_load(fused_kernel_mamba, generate_load, response_snapshot): +async def test_mamba_load(fused_kernel_mamba, generate_load, response_snapshot): responses = await generate_load(fused_kernel_mamba, "What is Deep Learning?", max_new_tokens=10, n=4) assert len(responses) == 4