From a78b6fd1e89ae033ea4815a13511b8a9f23e1071 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 4 Dec 2024 21:34:46 +0100 Subject: [PATCH] Fixing a few tests. --- docs/source/reference/launcher.md | 10 + .../models/__snapshots__/test.py | 22 + .../test_bloom_560m/test_bloom_560m.json | 58 ++- .../test_bloom_560m_all_params.json | 28 +- .../test_bloom_560m/test_bloom_560m_load.json | 232 ++++++++++- .../test_bloom_560m_sharded.json | 58 ++- .../test_bloom_560m_sharded_load.json | 232 ++++++++++- .../test_flash_llama_awq_load_sharded.json | 132 +----- .../test_idefics/test_idefics.json | 98 ++++- .../test_idefics/test_idefics_load.json | 392 +++++++++++++++++- .../test_mamba/test_mamba_all_params.json | 28 +- .../test_mamba/test_mamba_load.json | 112 ++++- .../__snapshots__/test_mpt/test_mpt.json | 28 +- .../__snapshots__/test_mpt/test_mpt_load.json | 112 ++++- .../test_mt0_base/test_mt0_base.json | 8 +- .../test_mt0_base_all_params.json | 8 +- .../test_mt0_base/test_mt0_base_load.json | 32 +- .../test_t5_sharded/test_t5_sharded.json | 8 +- .../test_t5_sharded/test_t5_sharded_load.json | 32 +- launcher/src/main.rs | 13 +- 20 files changed, 1476 insertions(+), 167 deletions(-) create mode 100644 integration-tests/models/__snapshots__/test.py diff --git a/docs/source/reference/launcher.md b/docs/source/reference/launcher.md index 90246aa4..159b22e7 100644 --- a/docs/source/reference/launcher.md +++ b/docs/source/reference/launcher.md @@ -467,6 +467,16 @@ Options: [env: PAYLOAD_LIMIT=] [default: 2000000] +``` +## ENABLE_PREFILL_LOGPROBS +```shell + --enable-prefill-logprobs + Enables prefill logprobs + + Logprobs in the prompt are deactivated by default because they consume a large amount of VRAM (especially for long prompts). Using this flag reallows users to ask for them. + + [env: ENABLE_PREFILL_LOGPROBS=] + ``` ## HELP ```shell diff --git a/integration-tests/models/__snapshots__/test.py b/integration-tests/models/__snapshots__/test.py new file mode 100644 index 00000000..f6c9a6a9 --- /dev/null +++ b/integration-tests/models/__snapshots__/test.py @@ -0,0 +1,22 @@ +import os +import json + + +for root, dirs, files in os.walk("."): + for filename in files: + if filename.endswith(".json"): + with open(os.path.join(root, filename), "r") as f: + data = json.load(f) + + print(os.path.join(root, filename)) + try: + if filename.endswith("_load.json"): + for i in range(len(data)): + data[i]["details"]["prefill"] = [] + else: + data["details"]["prefill"] = [] + except Exception: + pass + + with open(os.path.join(root, filename), "w") as f: + json.dump(data, f, indent=2, ensure_ascii=False) diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json index ac276749..54c66408 100644 --- a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json +++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m.json @@ -3,7 +3,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5703125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14746094, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9277344, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.421875, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5820312, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4013672, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5595703, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.9428711, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.703125, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7763672, + "text": " d'abord" + } + ], "seed": 0, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json index e538ba35..9422f27f 100644 --- a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json +++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_all_params.json @@ -3,7 +3,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 15, + "logprob": null, + "text": "," + }, + { + "id": 1669, + "logprob": -5.4453125, + "text": " il" + }, + { + "id": 11580, + "logprob": -2.3378906, + "text": " faut" + }, + { + "id": 3913, + "logprob": -4.3320312, + "text": " tout" + }, + { + "id": 39261, + "logprob": -2.9160156, + "text": " d'abord" + } + ], "seed": 0, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json index 59c62253..0a86bef8 100644 --- a/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json +++ b/integration-tests/models/__snapshots__/test_bloom_560m/test_bloom_560m_load.json @@ -4,7 +4,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4609375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5585938, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4003906, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5673828, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94628906, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.703125, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { @@ -76,7 +132,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.53125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4140625, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5234375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3613281, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5458984, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94189453, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { @@ -148,7 +260,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.53125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4140625, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5234375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3613281, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5458984, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94189453, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { @@ -220,7 +388,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.53125, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14770508, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4140625, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5234375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3613281, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5458984, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94189453, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json index 697edf6a..b17c889e 100644 --- a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json +++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded.json @@ -3,7 +3,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.546875, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14819336, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9257812, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4296875, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.5625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5634766, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.9458008, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6816406, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], "seed": 0, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json index 1528e963..2dd480b9 100644 --- a/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json +++ b/integration-tests/models/__snapshots__/test_bloom_560m_sharded/test_bloom_560m_sharded_load.json @@ -4,7 +4,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.5390625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.14758301, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9296875, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.4453125, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.59375, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.3994141, + "text": "," + }, + { + "id": 1669, + "logprob": -1.578125, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.9453125, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.7011719, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.5732422, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { @@ -76,7 +132,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.515625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.1484375, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.34375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.515625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5664062, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94091797, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6660156, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { @@ -148,7 +260,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.515625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.1484375, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.34375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.515625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5664062, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94091797, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6660156, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { @@ -220,7 +388,63 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 17934, + "logprob": null, + "text": "Pour" + }, + { + "id": 49833, + "logprob": -10.515625, + "text": " dég" + }, + { + "id": 21543, + "logprob": -0.1484375, + "text": "uster" + }, + { + "id": 447, + "logprob": -1.9287109, + "text": " un" + }, + { + "id": 46341, + "logprob": -15.34375, + "text": " ort" + }, + { + "id": 35567, + "logprob": -7.515625, + "text": "olan" + }, + { + "id": 15, + "logprob": -1.4199219, + "text": "," + }, + { + "id": 1669, + "logprob": -1.5664062, + "text": " il" + }, + { + "id": 11580, + "logprob": -0.94091797, + "text": " faut" + }, + { + "id": 3913, + "logprob": -3.6660156, + "text": " tout" + }, + { + "id": 39261, + "logprob": -1.7753906, + "text": " d'abord" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json b/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json index f1d9129d..56a10a75 100644 --- a/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json +++ b/integration-tests/models/__snapshots__/test_flash_awq_sharded/test_flash_llama_awq_load_sharded.json @@ -4,38 +4,7 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 1724, - "logprob": -7.6914062, - "text": "What" - }, - { - "id": 338, - "logprob": -1.4746094, - "text": "is" - }, - { - "id": 21784, - "logprob": -9.390625, - "text": "Deep" - }, - { - "id": 29257, - "logprob": -1.8623047, - "text": "Learning" - }, - { - "id": 29973, - "logprob": -0.7558594, - "text": "?" - } - ], + "prefill": [], "seed": null, "tokens": [ { @@ -108,38 +77,7 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 1724, - "logprob": -7.6914062, - "text": "What" - }, - { - "id": 338, - "logprob": -1.4746094, - "text": "is" - }, - { - "id": 21784, - "logprob": -9.390625, - "text": "Deep" - }, - { - "id": 29257, - "logprob": -1.8623047, - "text": "Learning" - }, - { - "id": 29973, - "logprob": -0.7558594, - "text": "?" - } - ], + "prefill": [], "seed": null, "tokens": [ { @@ -212,38 +150,7 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 1724, - "logprob": -7.6914062, - "text": "What" - }, - { - "id": 338, - "logprob": -1.4746094, - "text": "is" - }, - { - "id": 21784, - "logprob": -9.390625, - "text": "Deep" - }, - { - "id": 29257, - "logprob": -1.8623047, - "text": "Learning" - }, - { - "id": 29973, - "logprob": -0.7558594, - "text": "?" - } - ], + "prefill": [], "seed": null, "tokens": [ { @@ -316,38 +223,7 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [ - { - "id": 1, - "logprob": null, - "text": "" - }, - { - "id": 1724, - "logprob": -7.6914062, - "text": "What" - }, - { - "id": 338, - "logprob": -1.4746094, - "text": "is" - }, - { - "id": 21784, - "logprob": -9.390625, - "text": "Deep" - }, - { - "id": 29257, - "logprob": -1.8623047, - "text": "Learning" - }, - { - "id": 29973, - "logprob": -0.7558594, - "text": "?" - } - ], + "prefill": [], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics.json index 9774f84b..90fb6dcc 100644 --- a/integration-tests/models/__snapshots__/test_idefics/test_idefics.json +++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics.json @@ -3,7 +3,103 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4911, + "logprob": -6.9765625, + "text": "User" + }, + { + "id": 29901, + "logprob": -0.0059432983, + "text": ":" + }, + { + "id": 32000, + "logprob": -0.8408203, + "text": "" + }, + { + "id": 32001, + "logprob": -9.906292e-05, + "text": "" + }, + { + "id": 32000, + "logprob": -2.3841858e-07, + "text": "" + }, + { + "id": 1815, + "logprob": -4.1679688, + "text": "Can" + }, + { + "id": 366, + "logprob": -0.014099121, + "text": "you" + }, + { + "id": 2649, + "logprob": -4.4609375, + "text": "tell" + }, + { + "id": 592, + "logprob": -0.29882812, + "text": "me" + }, + { + "id": 263, + "logprob": -4.1445312, + "text": "a" + }, + { + "id": 1407, + "logprob": -9.3828125, + "text": "very" + }, + { + "id": 3273, + "logprob": -1.9736328, + "text": "short" + }, + { + "id": 5828, + "logprob": -0.2800293, + "text": "story" + }, + { + "id": 2729, + "logprob": -3.5625, + "text": "based" + }, + { + "id": 373, + "logprob": -0.0006427765, + "text": "on" + }, + { + "id": 278, + "logprob": -0.13952637, + "text": "the" + }, + { + "id": 1967, + "logprob": -0.068115234, + "text": "image" + }, + { + "id": 29973, + "logprob": -0.16357422, + "text": "?" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json b/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json index 0b10b285..21d6161b 100644 --- a/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json +++ b/integration-tests/models/__snapshots__/test_idefics/test_idefics_load.json @@ -4,7 +4,103 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4911, + "logprob": -6.9804688, + "text": "User" + }, + { + "id": 29901, + "logprob": -0.006122589, + "text": ":" + }, + { + "id": 32000, + "logprob": -0.8417969, + "text": "" + }, + { + "id": 32001, + "logprob": -9.918213e-05, + "text": "" + }, + { + "id": 32000, + "logprob": -2.3841858e-07, + "text": "" + }, + { + "id": 1815, + "logprob": -4.1679688, + "text": "Can" + }, + { + "id": 366, + "logprob": -0.014091492, + "text": "you" + }, + { + "id": 2649, + "logprob": -4.4726562, + "text": "tell" + }, + { + "id": 592, + "logprob": -0.2998047, + "text": "me" + }, + { + "id": 263, + "logprob": -4.15625, + "text": "a" + }, + { + "id": 1407, + "logprob": -9.3828125, + "text": "very" + }, + { + "id": 3273, + "logprob": -1.9716797, + "text": "short" + }, + { + "id": 5828, + "logprob": -0.27734375, + "text": "story" + }, + { + "id": 2729, + "logprob": -3.5605469, + "text": "based" + }, + { + "id": 373, + "logprob": -0.00064468384, + "text": "on" + }, + { + "id": 278, + "logprob": -0.14160156, + "text": "the" + }, + { + "id": 1967, + "logprob": -0.06915283, + "text": "image" + }, + { + "id": 29973, + "logprob": -0.16381836, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -76,7 +172,103 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4911, + "logprob": -6.9804688, + "text": "User" + }, + { + "id": 29901, + "logprob": -0.006122589, + "text": ":" + }, + { + "id": 32000, + "logprob": -0.8417969, + "text": "" + }, + { + "id": 32001, + "logprob": -9.942055e-05, + "text": "" + }, + { + "id": 32000, + "logprob": -2.3841858e-07, + "text": "" + }, + { + "id": 1815, + "logprob": -4.1679688, + "text": "Can" + }, + { + "id": 366, + "logprob": -0.014091492, + "text": "you" + }, + { + "id": 2649, + "logprob": -4.4726562, + "text": "tell" + }, + { + "id": 592, + "logprob": -0.2998047, + "text": "me" + }, + { + "id": 263, + "logprob": -4.15625, + "text": "a" + }, + { + "id": 1407, + "logprob": -9.3828125, + "text": "very" + }, + { + "id": 3273, + "logprob": -1.9716797, + "text": "short" + }, + { + "id": 5828, + "logprob": -0.27734375, + "text": "story" + }, + { + "id": 2729, + "logprob": -3.5605469, + "text": "based" + }, + { + "id": 373, + "logprob": -0.0006451607, + "text": "on" + }, + { + "id": 278, + "logprob": -0.14160156, + "text": "the" + }, + { + "id": 1967, + "logprob": -0.06915283, + "text": "image" + }, + { + "id": 29973, + "logprob": -0.16381836, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -148,7 +340,103 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4911, + "logprob": -6.9804688, + "text": "User" + }, + { + "id": 29901, + "logprob": -0.006122589, + "text": ":" + }, + { + "id": 32000, + "logprob": -0.8417969, + "text": "" + }, + { + "id": 32001, + "logprob": -9.918213e-05, + "text": "" + }, + { + "id": 32000, + "logprob": -2.3841858e-07, + "text": "" + }, + { + "id": 1815, + "logprob": -4.1679688, + "text": "Can" + }, + { + "id": 366, + "logprob": -0.014091492, + "text": "you" + }, + { + "id": 2649, + "logprob": -4.4726562, + "text": "tell" + }, + { + "id": 592, + "logprob": -0.2998047, + "text": "me" + }, + { + "id": 263, + "logprob": -4.15625, + "text": "a" + }, + { + "id": 1407, + "logprob": -9.3828125, + "text": "very" + }, + { + "id": 3273, + "logprob": -1.9716797, + "text": "short" + }, + { + "id": 5828, + "logprob": -0.27734375, + "text": "story" + }, + { + "id": 2729, + "logprob": -3.5605469, + "text": "based" + }, + { + "id": 373, + "logprob": -0.00064468384, + "text": "on" + }, + { + "id": 278, + "logprob": -0.14160156, + "text": "the" + }, + { + "id": 1967, + "logprob": -0.06915283, + "text": "image" + }, + { + "id": 29973, + "logprob": -0.16381836, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -220,7 +508,103 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1, + "logprob": null, + "text": "" + }, + { + "id": 4911, + "logprob": -6.9804688, + "text": "User" + }, + { + "id": 29901, + "logprob": -0.006122589, + "text": ":" + }, + { + "id": 32000, + "logprob": -0.8417969, + "text": "" + }, + { + "id": 32001, + "logprob": -9.942055e-05, + "text": "" + }, + { + "id": 32000, + "logprob": -2.3841858e-07, + "text": "" + }, + { + "id": 1815, + "logprob": -4.1679688, + "text": "Can" + }, + { + "id": 366, + "logprob": -0.014091492, + "text": "you" + }, + { + "id": 2649, + "logprob": -4.4726562, + "text": "tell" + }, + { + "id": 592, + "logprob": -0.2998047, + "text": "me" + }, + { + "id": 263, + "logprob": -4.15625, + "text": "a" + }, + { + "id": 1407, + "logprob": -9.3828125, + "text": "very" + }, + { + "id": 3273, + "logprob": -1.9716797, + "text": "short" + }, + { + "id": 5828, + "logprob": -0.27734375, + "text": "story" + }, + { + "id": 2729, + "logprob": -3.5605469, + "text": "based" + }, + { + "id": 373, + "logprob": -0.0006451607, + "text": "on" + }, + { + "id": 278, + "logprob": -0.14160156, + "text": "the" + }, + { + "id": 1967, + "logprob": -0.06915283, + "text": "image" + }, + { + "id": 29973, + "logprob": -0.16381836, + "text": "?" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json index 3895b48d..ef88926c 100644 --- a/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json +++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_all_params.json @@ -3,7 +3,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 2502, + "logprob": null, + "text": " red" + }, + { + "id": 13, + "logprob": -2.734375, + "text": "," + }, + { + "id": 8862, + "logprob": -3.6875, + "text": " yellow" + }, + { + "id": 13, + "logprob": -0.40234375, + "text": "," + }, + { + "id": 209, + "logprob": -8.25, + "text": " " + } + ], "seed": 0, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json b/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json index 99edd6dd..4921c14b 100644 --- a/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json +++ b/integration-tests/models/__snapshots__/test_mamba/test_mamba_load.json @@ -4,7 +4,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -0.83984375, + "text": " is" + }, + { + "id": 18147, + "logprob": -12.8125, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -2.84375, + "text": " Learning" + }, + { + "id": 32, + "logprob": -1.25, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -77,7 +103,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -0.80078125, + "text": " is" + }, + { + "id": 18147, + "logprob": -13.25, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -2.828125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -1.1953125, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -150,7 +202,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -0.80078125, + "text": " is" + }, + { + "id": 18147, + "logprob": -13.25, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -2.828125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -1.1953125, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -223,7 +301,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -0.80078125, + "text": " is" + }, + { + "id": 18147, + "logprob": -13.25, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -2.828125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -1.1953125, + "text": "?" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mpt/test_mpt.json b/integration-tests/models/__snapshots__/test_mpt/test_mpt.json index ba8ee809..abbbf03c 100644 --- a/integration-tests/models/__snapshots__/test_mpt/test_mpt.json +++ b/integration-tests/models/__snapshots__/test_mpt/test_mpt.json @@ -3,7 +3,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 17, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5117188, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.96875, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.953125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.94189453, + "text": "?" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json b/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json index bb7b8846..e3bc57ed 100644 --- a/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json +++ b/integration-tests/models/__snapshots__/test_mpt/test_mpt_load.json @@ -4,7 +4,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 17, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5117188, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.96875, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.953125, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.94189453, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -118,7 +144,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 17, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.984375, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.96875, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.93359375, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -232,7 +284,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 17, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.984375, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.96875, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.93359375, + "text": "?" + } + ], "seed": null, "tokens": [ { @@ -346,7 +424,33 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 17, - "prefill": [], + "prefill": [ + { + "id": 1276, + "logprob": null, + "text": "What" + }, + { + "id": 310, + "logprob": -1.5, + "text": " is" + }, + { + "id": 18147, + "logprob": -8.984375, + "text": " Deep" + }, + { + "id": 20727, + "logprob": -1.96875, + "text": " Learning" + }, + { + "id": 32, + "logprob": -0.93359375, + "text": "?" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json index b83e31a5..c1cd24cd 100644 --- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json +++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base.json @@ -3,7 +3,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 5, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": 0, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json index 06864988..9fd950a2 100644 --- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json +++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_all_params.json @@ -3,7 +3,13 @@ "best_of_sequences": null, "finish_reason": "length", "generated_tokens": 10, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": 0, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json index 205e6656..c0834ae1 100644 --- a/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json +++ b/integration-tests/models/__snapshots__/test_mt0_base/test_mt0_base_load.json @@ -4,7 +4,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 6, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { @@ -52,7 +58,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 6, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { @@ -100,7 +112,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 6, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { @@ -148,7 +166,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 6, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json b/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json index 6bdf9606..6090e2c9 100644 --- a/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json +++ b/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded.json @@ -3,7 +3,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 7, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { diff --git a/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json b/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json index 16b92294..3e9af12e 100644 --- a/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json +++ b/integration-tests/models/__snapshots__/test_t5_sharded/test_t5_sharded_load.json @@ -4,7 +4,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 7, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { @@ -58,7 +64,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 7, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { @@ -112,7 +124,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 7, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { @@ -166,7 +184,13 @@ "best_of_sequences": null, "finish_reason": "eos_token", "generated_tokens": 7, - "prefill": [], + "prefill": [ + { + "id": 0, + "logprob": null, + "text": "" + } + ], "seed": null, "tokens": [ { diff --git a/launcher/src/main.rs b/launcher/src/main.rs index 610d6227..0530d521 100644 --- a/launcher/src/main.rs +++ b/launcher/src/main.rs @@ -80,7 +80,7 @@ fn get_config( }; let content = std::fs::read_to_string(filename)?; - let config: RawConfig = serde_json::from_str(&content)?; + let config: RawConfig = serde_json::from_str(&content).expect("?"); let config: Config = config.into(); Ok(config) @@ -171,6 +171,8 @@ struct RawConfig { head_dim: Option, vision_config: Option, is_encoder_decoder: Option, + #[serde(rename = "num_experts_per_tok")] + experts: Option, } #[derive(Deserialize)] @@ -194,6 +196,7 @@ struct Config { model_type: Option, vision_config: Option, is_encoder_decoder: bool, + experts: Option, } impl Config { @@ -202,7 +205,11 @@ impl Config { let num_kv_heads = self.num_kv_heads? as u64; let head_dim = self.head_dim? as u64; let hidden_size = self.hidden_size? as u64; - let intermediate_size = self.intermediate_size? as u64; + let intermediate_size = if let Some(experts) = self.experts { + (self.intermediate_size? * experts) as u64 + } else { + self.intermediate_size? as u64 + }; let num_layers = self.num_layers? as u64; let q_flops = 2 * num_heads * head_dim * hidden_size; @@ -245,6 +252,7 @@ impl From for Config { let model_type = other.model_type; let vision_config = other.vision_config; let is_encoder_decoder = other.is_encoder_decoder.unwrap_or(false); + let experts = other.experts; Config { max_position_embeddings, quantize, @@ -257,6 +265,7 @@ impl From for Config { num_kv_heads, intermediate_size, num_layers, + experts, } } }