diff --git a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json index e39829ec..13c46f54 100644 --- a/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_llama_fp8/test_flash_llama_fp8_all_params.json @@ -1,8 +1,8 @@ { "details": { "best_of_sequences": null, - "finish_reason": "stop_sequence", - "generated_tokens": 5, + "finish_reason": "length", + "generated_tokens": 10, "prefill": [ { "id": 128000, @@ -11,12 +11,12 @@ }, { "id": 2323, - "logprob": -9.5625, + "logprob": -9.5234375, "text": "Test" }, { "id": 1715, - "logprob": -10.4375, + "logprob": -10.421875, "text": " request" } ], @@ -24,36 +24,66 @@ "tokens": [ { "id": 25, - "logprob": -0.8984375, + "logprob": -0.88183594, "special": false, "text": ":" }, { - "id": 923, - "logprob": -2.84375, + "id": 2209, + "logprob": -2.6699219, "special": false, - "text": " add" + "text": " Is" }, { - "id": 264, - "logprob": 0.0, + "id": 279, + "logprob": -0.61083984, "special": false, - "text": " a" + "text": " the" + }, + { + "id": 734, + "logprob": -2.6660156, + "special": false, + "text": " function" }, { "id": 330, - "logprob": -0.31640625, + "logprob": -0.35498047, "special": false, "text": " \"" }, { - "id": 1985, - "logprob": 0.0, + "id": 4110, + "logprob": -2.4101562, "special": false, - "text": "test" + "text": "Create" + }, + { + "id": 7575, + "logprob": -2.2304688, + "special": false, + "text": "Process" + }, + { + "id": 1, + "logprob": -0.080078125, + "special": false, + "text": "\"" + }, + { + "id": 304, + "logprob": -0.75439453, + "special": false, + "text": " in" + }, + { + "id": 12468, + "logprob": -1.8769531, + "special": false, + "text": " Win" } ], "top_tokens": null }, - "generated_text": "Test request: add a \"test" + "generated_text": "Test request: Is the function \"CreateProcess\" in Win" } diff --git a/integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json b/integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json index 8bce3e10..f195f8f7 100644 --- a/integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json +++ b/integration-tests/models/__snapshots__/test_flash_llama_fp8_kv_cache/test_flash_llama_fp8_kv_cache_all_params.json @@ -16,17 +16,17 @@ }, { "id": 5655, - "logprob": -11.75, + "logprob": -11.8359375, "text": " deep" }, { "id": 6975, - "logprob": -2.0625, + "logprob": -2.0703125, "text": " learning" }, { "id": 30, - "logprob": -6.0, + "logprob": -5.9765625, "text": "?" } ], @@ -40,25 +40,25 @@ }, { "id": 34564, - "logprob": -0.11279297, + "logprob": -0.12512207, "special": false, "text": "Deep" }, { "id": 6975, - "logprob": -0.16015625, + "logprob": 0.0, "special": false, "text": " learning" }, { "id": 320, - "logprob": -0.25195312, + "logprob": -0.23840332, "special": false, "text": " (" }, { "id": 16931, - "logprob": -1.703125, + "logprob": -2.0175781, "special": false, "text": "DL" }, @@ -70,7 +70,7 @@ }, { "id": 374, - "logprob": -1.140625, + "logprob": -0.8613281, "special": false, "text": " is" }, @@ -82,7 +82,7 @@ }, { "id": 1207, - "logprob": -1.3125, + "logprob": -1.2451172, "special": false, "text": " sub" },