diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json index ef88926c..93724fe4 100644 --- a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json +++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json @@ -11,22 +11,22 @@ }, { "id": 13, - "logprob": -2.734375, + "logprob": -2.59375, "text": "," }, { "id": 8862, - "logprob": -3.6875, + "logprob": -3.5625, "text": " yellow" }, { "id": 13, - "logprob": -0.40234375, + "logprob": -0.44726562, "text": "," }, { "id": 209, - "logprob": -8.25, + "logprob": -8.0, "text": " " } ], @@ -52,7 +52,7 @@ }, { "id": 9830, - "logprob": -2.25, + "logprob": -2.03125, "special": false, "text": " colors" }, @@ -64,13 +64,13 @@ }, { "id": 329, - "logprob": -2.171875, + "logprob": -2.734375, "special": false, "text": " A" }, { "id": 1180, - "logprob": -2.046875, + "logprob": -2.0, "special": false, "text": " number" }, @@ -81,19 +81,19 @@ "text": " of" }, { - "id": 1027, - "logprob": -1.5546875, + "id": 253, + "logprob": -0.69140625, "special": false, - "text": " different" + "text": " the" }, { "id": 3295, - "logprob": -0.97265625, + "logprob": -0.8203125, "special": false, "text": " color" } ], "top_tokens": null }, - "generated_text": "blue, red, yellow, \nand blue colors. A number of different color" + "generated_text": "blue, red, yellow, \nand blue colors. A number of the color" } diff --git a/integration-tests/models/test_flash_llama_exl2.py b/integration-tests/models/test_flash_llama_exl2.py index 7169c999..1ee67695 100644 --- a/integration-tests/models/test_flash_llama_exl2.py +++ b/integration-tests/models/test_flash_llama_exl2.py @@ -21,7 +21,7 @@ async def flash_llama_exl2(flash_llama_exl2_handle): return flash_llama_exl2_handle.client -@pytest.mark.release +# @pytest.mark.release @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_exl2(flash_llama_exl2, ignore_logprob_response_snapshot): @@ -33,7 +33,7 @@ async def test_flash_llama_exl2(flash_llama_exl2, ignore_logprob_response_snapsh assert response == ignore_logprob_response_snapshot -@pytest.mark.release +# @pytest.mark.release @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_exl2_all_params( @@ -60,7 +60,7 @@ async def test_flash_llama_exl2_all_params( assert response == ignore_logprob_response_snapshot -@pytest.mark.release +# @pytest.mark.release @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_exl2_load( diff --git a/integration-tests/models/test_flash_llama_fp8.py b/integration-tests/models/test_flash_llama_fp8.py index fe5df590..bc7458b7 100644 --- a/integration-tests/models/test_flash_llama_fp8.py +++ b/integration-tests/models/test_flash_llama_fp8.py @@ -13,7 +13,7 @@ async def flash_llama_fp8(flash_llama_fp8_handle): return flash_llama_fp8_handle.client -@pytest.mark.release +# @pytest.mark.release @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_fp8(flash_llama_fp8, response_snapshot): @@ -25,7 +25,7 @@ async def test_flash_llama_fp8(flash_llama_fp8, response_snapshot): assert response == response_snapshot -@pytest.mark.release +# @pytest.mark.release @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_fp8_all_params(flash_llama_fp8, response_snapshot): @@ -48,7 +48,7 @@ async def test_flash_llama_fp8_all_params(flash_llama_fp8, response_snapshot): assert response == response_snapshot -@pytest.mark.release +# @pytest.mark.release @pytest.mark.asyncio @pytest.mark.private async def test_flash_llama_fp8_load(flash_llama_fp8, generate_load, response_snapshot):