diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json index dd8936af..a4f7269c 100644 --- a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json +++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json @@ -11,47 +11,47 @@ }, { "id": 49833, - "logprob": -10.5390625, + "logprob": -10.5859375, "text": " dég" }, { "id": 21543, - "logprob": -0.14758301, + "logprob": -0.14794922, "text": "uster" }, { "id": 447, - "logprob": -1.9296875, + "logprob": -1.9277344, "text": " un" }, { "id": 46341, - "logprob": -15.4453125, + "logprob": -15.3203125, "text": " ort" }, { "id": 35567, - "logprob": -7.59375, + "logprob": -7.5664062, "text": "olan" }, { "id": 15, - "logprob": -1.3994141, + "logprob": -1.3974609, "text": "," }, { "id": 1669, - "logprob": -1.578125, + "logprob": -1.5351562, "text": " il" }, { "id": 11580, - "logprob": -0.9453125, + "logprob": -0.9423828, "text": " faut" }, { "id": 3913, - "logprob": -3.7011719, + "logprob": -3.671875, "text": " tout" }, { @@ -60,69 +60,70 @@ "text": " d'abord" } ], - "seed": 0, + "seed": 1, "tokens": [ { - "id": 578, - "logprob": -1.6474609, + "id": 2997, + "logprob": -4.4101562, "special": false, - "text": " le" + "text": " vous" }, { - "id": 5608, - "logprob": -2.5097656, + "id": 71256, + "logprob": -5.421875, "special": false, - "text": " faire" + "text": " retrouver" }, { - "id": 159570, - "logprob": -6.65625, + "id": 693, + "logprob": -2.1738281, "special": false, - "text": " réch" + "text": " à" }, { - "id": 810, - "logprob": 0.0, + "id": 366, + "logprob": -1.5322266, "special": false, - "text": "au" + "text": " la" }, { - "id": 12736, - "logprob": 0.0, + "id": 221398, + "logprob": -2.6640625, "special": false, - "text": "ffer" + "text": " terrasse" }, { - "id": 1742, - "logprob": -2.5859375, + "id": 1375, + "logprob": -4.375, "special": false, - "text": " au" + "text": " pour" }, { - "id": 6105, - "logprob": -2.03125, + "id": 86887, + "logprob": -4.8203125, "special": false, - "text": " bain" + "text": " essayer" }, { - "id": 88254, - "logprob": -0.12695312, + "id": 2155, + "logprob": -2.7421875, "special": false, - "text": "-mar" + "text": " ce" }, { - "id": 641, - "logprob": 0.0, + "id": 5743, + "logprob": -3.1757812, "special": false, - "text": "ie" + "text": " jus" }, { - "id": 2940, - "logprob": -3.5175781, + "id": 1479, + "logprob": -3.3554688, "special": false, - "text": " avec" + "text": " qui" } - ] + ], + "top_tokens": null }, - "generated_text": " le faire réchauffer au bain-marie avec" + "generated_text": " vous retrouver à la terrasse pour essayer ce jus qui" } diff --git a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json index 8253dc96..0286e3da 100644 --- a/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_gemma/test_flash_gemma_all_params.json @@ -20,7 +20,7 @@ "text": " request" } ], - "seed": 0, + "seed": 1, "tokens": [ { "id": 7539, @@ -35,20 +35,20 @@ "text": " are" }, { - "id": 671, - "logprob": -1.703125, + "id": 476, + "logprob": -0.453125, "special": false, - "text": " an" + "text": " a" }, { - "id": 8727, - "logprob": 0.0, + "id": 2621, + "logprob": -2.078125, "special": false, - "text": " essential" + "text": " key" }, { "id": 1702, - "logprob": 0.0, + "logprob": -0.20117188, "special": false, "text": " part" }, @@ -64,26 +64,26 @@ "special": false, "text": " the" }, - { - "id": 11859, - "logprob": -1.6953125, - "special": false, - "text": " lab" - }, { "id": 2185, - "logprob": -1.3125, + "logprob": -0.765625, "special": false, "text": " process" }, { - "id": 578, - "logprob": -1.5, + "id": 604, + "logprob": 0.0, "special": false, - "text": " and" + "text": " for" + }, + { + "id": 17583, + "logprob": -2.609375, + "special": false, + "text": " sending" } ], "top_tokens": null }, - "generated_text": "Test request forms are an essential part of the lab process and" + "generated_text": "Test request forms are a key part of the process for sending" } diff --git a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json index 221ff13d..ac0ecab6 100644 --- a/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_phi/test_flash_phi_all_params.json @@ -1,8 +1,8 @@ { "details": { "best_of_sequences": null, - "finish_reason": "stop_sequence", - "generated_tokens": 6, + "finish_reason": "length", + "generated_tokens": 10, "prefill": [ { "id": 14402, @@ -15,7 +15,7 @@ "text": " request" } ], - "seed": 0, + "seed": 1, "tokens": [ { "id": 284, @@ -24,37 +24,61 @@ "text": " to" }, { - "id": 3758, - "logprob": -0.62597656, + "id": 262, + "logprob": -2.0957031, "special": false, - "text": " send" + "text": " the" }, { - "id": 1366, - "logprob": -0.87060547, + "id": 7824, + "logprob": 0.0, "special": false, - "text": " data" + "text": " API" }, { - "id": 625, - "logprob": -0.88427734, + "id": 198, + "logprob": -1.4794922, "special": false, - "text": " over" + "text": "\n" }, { - "id": 257, - "logprob": -1.0830078, + "id": 50280, + "logprob": 0.0, "special": false, - "text": " a" + "text": " " }, { - "id": 3127, - "logprob": -1.9462891, + "id": 37811, + "logprob": 0.0, "special": false, - "text": " network" + "text": "\"\"\"" + }, + { + "id": 628, + "logprob": 0.0, + "special": false, + "text": "\n\n" + }, + { + "id": 50284, + "logprob": 0.0, + "special": false, + "text": " " + }, + { + "id": 4299, + "logprob": 0.0, + "special": false, + "text": "def" + }, + { + "id": 1332, + "logprob": -1.7158203, + "special": false, + "text": " test" } ], "top_tokens": null }, - "generated_text": "Test request to send data over a network" + "generated_text": "Test request to the API\n \"\"\"\n\n def test" } diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json index 85e9a9e0..079bb13a 100644 --- a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json +++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json @@ -30,7 +30,7 @@ "text": " " } ], - "seed": 0, + "seed": 1, "tokens": [ { "id": 187, @@ -39,22 +39,22 @@ "text": "\n" }, { - "id": 395, - "logprob": -0.3125, + "id": 11863, + "logprob": -1.3125, "special": false, - "text": "and" + "text": "blue" }, { - "id": 4797, + "id": 285, + "logprob": -1.1015625, + "special": false, + "text": " and" + }, + { + "id": 4759, "logprob": 0.0, "special": false, - "text": " blue" - }, - { - "id": 9830, - "logprob": -1.65625, - "special": false, - "text": " colors" + "text": " green" }, { "id": 15, @@ -63,16 +63,16 @@ "text": "." }, { - "id": 329, - "logprob": -2.4375, + "id": 380, + "logprob": -1.65625, "special": false, - "text": " A" + "text": " The" }, { - "id": 1180, - "logprob": -1.953125, + "id": 3295, + "logprob": -0.45117188, "special": false, - "text": " number" + "text": " color" }, { "id": 273, @@ -81,19 +81,19 @@ "text": " of" }, { - "id": 1027, - "logprob": -1.5546875, + "id": 253, + "logprob": 0.0, "special": false, - "text": " different" + "text": " the" }, { - "id": 3295, - "logprob": -0.97265625, + "id": 2329, + "logprob": -3.125, "special": false, - "text": " color" + "text": " air" } ], "top_tokens": null }, - "generated_text": "blue, red, yellow, \nand blue colors. A number of different color" + "generated_text": "blue, red, yellow, \nblue and green. The color of the air" } diff --git a/integration-tests/models/test_flash_gemma.py b/integration-tests/models/test_flash_gemma.py index 89d1b2e3..d167b813 100644 --- a/integration-tests/models/test_flash_gemma.py +++ b/integration-tests/models/test_flash_gemma.py @@ -3,7 +3,7 @@ import pytest @pytest.fixture(scope="module") def flash_gemma_handle(launcher): - with launcher("gg-hf/gemma-2b", num_shard=1) as handle: + with launcher("google/gemma-2b", num_shard=1) as handle: yield handle @@ -13,7 +13,7 @@ async def flash_gemma(flash_gemma_handle): return flash_gemma_handle.client -# @pytest.mark.skip +@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_gemma(flash_gemma, response_snapshot): @@ -25,7 +25,7 @@ async def test_flash_gemma(flash_gemma, response_snapshot): assert response == response_snapshot -# @pytest.mark.skip +@pytest.mark.skip @pytest.mark.asyncio @pytest.mark.private async def test_flash_gemma_all_params(flash_gemma, response_snapshot): diff --git a/integration-tests/models/test_flash_phi.py b/integration-tests/models/test_flash_phi.py index 679e1ac8..2dbc5f04 100644 --- a/integration-tests/models/test_flash_phi.py +++ b/integration-tests/models/test_flash_phi.py @@ -42,8 +42,11 @@ async def test_flash_phi_all_params(flash_phi, response_snapshot): seed=1, ) - assert response.details.generated_tokens == 6 - assert response.generated_text == "Test request to send data over a network" + assert response.details.generated_tokens == 10 + assert ( + response.generated_text + == 'Test request to the API\n """\n\n def test' + ) assert response == response_snapshot diff --git a/integration-tests/models/test_mamba.py b/integration-tests/models/test_mamba.py index f8a8b00f..e50a75ff 100644 --- a/integration-tests/models/test_mamba.py +++ b/integration-tests/models/test_mamba.py @@ -45,7 +45,7 @@ async def test_mamba_all_params(fused_kernel_mamba, response_snapshot): assert response.details.generated_tokens == 10 assert ( response.generated_text - == "blue, red, yellow, \nand blue colors. A number of different color" + == "blue, red, yellow, \nblue and green. The color of the air" ) assert response == response_snapshot diff --git a/integration-tests/models/test_mt0_base.py b/integration-tests/models/test_mt0_base.py index 3a51be69..2ace787d 100644 --- a/integration-tests/models/test_mt0_base.py +++ b/integration-tests/models/test_mt0_base.py @@ -23,7 +23,7 @@ async def test_mt0_base(mt0_base, response_snapshot): seed=1, ) - assert response.details.generated_tokens == 5 + assert response.details.generated_tokens == 7 assert response == response_snapshot