From c5995652b065279d84ca8be9e53ef5ea2ee69862 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 7 Jun 2023 07:52:15 +0000 Subject: [PATCH] Fix regular flash --- .gitignore | 2 +- .../test_neox_sharded/test_neox_load.json | 378 +++++++++--------- .../text_generation_server/models/__init__.py | 2 +- 3 files changed, 191 insertions(+), 191 deletions(-) diff --git a/.gitignore b/.gitignore index 4f8f7b87..20c9baee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ .idea target router/tokenizer.json -.*__pycache__.* +*__pycache__* diff --git a/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json b/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json index 15637cdb..0b38e701 100644 --- a/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json +++ b/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json @@ -17,82 +17,82 @@ }, { "id": 310, - "logprob": -5.4140625, + "logprob": -5.4179688, "text": " is" }, { "id": 247, - "logprob": -2.1621094, + "logprob": -2.1542969, "text": " a" }, { "id": 1167, - "logprob": -5.453125, + "logprob": -5.359375, "text": " mem" }, { "id": 70, - "logprob": -0.005393982, + "logprob": -0.006038666, "text": "e" }, { "id": 13, - "logprob": -7.390625, + "logprob": -7.328125, "text": "," }, { "id": 285, - "logprob": -0.33691406, + "logprob": -0.3173828, "text": " and" }, { "id": 752, - "logprob": -2.2207031, + "logprob": -2.0625, "text": " what" }, { "id": 434, - "logprob": -5.5976562, + "logprob": -5.7734375, "text": "'s" }, { "id": 253, - "logprob": -0.7661133, + "logprob": -0.74072266, "text": " the" }, { "id": 2892, - "logprob": -6.515625, + "logprob": -6.5898438, "text": " history" }, { "id": 3212, - "logprob": -2.3085938, + "logprob": -2.2949219, "text": " behind" }, { "id": 436, - "logprob": -11.3203125, + "logprob": -11.40625, "text": " this" }, { "id": 3159, - "logprob": -2.1230469, + "logprob": -2.1113281, "text": " word" }, { "id": 32, - "logprob": -0.00856781, + "logprob": -0.008056641, "text": "?" }, { "id": 0, - "logprob": -2.4296875, + "logprob": -2.3300781, "text": "<|endoftext|>" }, { "id": 50281, - "logprob": -18.1875, + "logprob": -18.28125, "text": "<|assistant|>" } ], @@ -100,61 +100,224 @@ "tokens": [ { "id": 510, - "logprob": -0.64208984, + "logprob": -0.5878906, "special": false, "text": "The" }, { "id": 3159, - "logprob": -0.5839844, + "logprob": -0.5498047, "special": false, "text": " word" }, { "id": 346, - "logprob": -0.04989624, + "logprob": -0.04815674, "special": false, "text": " \"" }, { "id": 6441, - "logprob": -0.0021305084, + "logprob": -0.002313614, "special": false, "text": "mem" }, { "id": 70, - "logprob": -1.180172e-05, + "logprob": -1.2636185e-05, "special": false, "text": "e" }, { "id": 3, - "logprob": -0.00092983246, + "logprob": -0.0010147095, "special": false, "text": "\"" }, { "id": 369, - "logprob": -0.08496094, + "logprob": -0.0859375, "special": false, "text": " was" }, { "id": 806, - "logprob": -0.13256836, + "logprob": -0.12609863, "special": false, "text": " first" }, { "id": 908, - "logprob": -0.017059326, + "logprob": -0.016601562, "special": false, "text": " used" }, { "id": 275, - "logprob": -0.4921875, + "logprob": -0.38256836, + "special": false, + "text": " in" + } + ] + }, + "generated_text": "The word \"meme\" was first used in" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50278, + "logprob": null, + "text": "<|prompter|>" + }, + { + "id": 1276, + "logprob": -8.0234375, + "text": "What" + }, + { + "id": 310, + "logprob": -5.421875, + "text": " is" + }, + { + "id": 247, + "logprob": -2.1640625, + "text": " a" + }, + { + "id": 1167, + "logprob": -5.40625, + "text": " mem" + }, + { + "id": 70, + "logprob": -0.005420685, + "text": "e" + }, + { + "id": 13, + "logprob": -7.2226562, + "text": "," + }, + { + "id": 285, + "logprob": -0.26879883, + "text": " and" + }, + { + "id": 752, + "logprob": -2.1992188, + "text": " what" + }, + { + "id": 434, + "logprob": -5.46875, + "text": "'s" + }, + { + "id": 253, + "logprob": -0.8017578, + "text": " the" + }, + { + "id": 2892, + "logprob": -6.6796875, + "text": " history" + }, + { + "id": 3212, + "logprob": -2.1972656, + "text": " behind" + }, + { + "id": 436, + "logprob": -11.4453125, + "text": " this" + }, + { + "id": 3159, + "logprob": -2.1933594, + "text": " word" + }, + { + "id": 32, + "logprob": -0.007858276, + "text": "?" + }, + { + "id": 0, + "logprob": -2.328125, + "text": "<|endoftext|>" + }, + { + "id": 50281, + "logprob": -18.21875, + "text": "<|assistant|>" + } + ], + "seed": null, + "tokens": [ + { + "id": 510, + "logprob": -0.6201172, + "special": false, + "text": "The" + }, + { + "id": 3159, + "logprob": -0.546875, + "special": false, + "text": " word" + }, + { + "id": 346, + "logprob": -0.051879883, + "special": false, + "text": " \"" + }, + { + "id": 6441, + "logprob": -0.0020179749, + "special": false, + "text": "mem" + }, + { + "id": 70, + "logprob": -9.059906e-06, + "special": false, + "text": "e" + }, + { + "id": 3, + "logprob": -0.00096797943, + "special": false, + "text": "\"" + }, + { + "id": 369, + "logprob": -0.07940674, + "special": false, + "text": " was" + }, + { + "id": 806, + "logprob": -0.12182617, + "special": false, + "text": " first" + }, + { + "id": 908, + "logprob": -0.017227173, + "special": false, + "text": " used" + }, + { + "id": 275, + "logprob": -0.44482422, "special": false, "text": " in" } @@ -487,168 +650,5 @@ ] }, "generated_text": "The word \"meme\" was first used in" - }, - { - "details": { - "best_of_sequences": null, - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [ - { - "id": 50278, - "logprob": null, - "text": "<|prompter|>" - }, - { - "id": 1276, - "logprob": -8.0234375, - "text": "What" - }, - { - "id": 310, - "logprob": -5.421875, - "text": " is" - }, - { - "id": 247, - "logprob": -2.1640625, - "text": " a" - }, - { - "id": 1167, - "logprob": -5.40625, - "text": " mem" - }, - { - "id": 70, - "logprob": -0.005420685, - "text": "e" - }, - { - "id": 13, - "logprob": -7.2226562, - "text": "," - }, - { - "id": 285, - "logprob": -0.26879883, - "text": " and" - }, - { - "id": 752, - "logprob": -2.1992188, - "text": " what" - }, - { - "id": 434, - "logprob": -5.46875, - "text": "'s" - }, - { - "id": 253, - "logprob": -0.8017578, - "text": " the" - }, - { - "id": 2892, - "logprob": -6.6796875, - "text": " history" - }, - { - "id": 3212, - "logprob": -2.1972656, - "text": " behind" - }, - { - "id": 436, - "logprob": -11.4453125, - "text": " this" - }, - { - "id": 3159, - "logprob": -2.1933594, - "text": " word" - }, - { - "id": 32, - "logprob": -0.007858276, - "text": "?" - }, - { - "id": 0, - "logprob": -2.328125, - "text": "<|endoftext|>" - }, - { - "id": 50281, - "logprob": -18.21875, - "text": "<|assistant|>" - } - ], - "seed": null, - "tokens": [ - { - "id": 510, - "logprob": -0.6201172, - "special": false, - "text": "The" - }, - { - "id": 3159, - "logprob": -0.546875, - "special": false, - "text": " word" - }, - { - "id": 346, - "logprob": -0.051879883, - "special": false, - "text": " \"" - }, - { - "id": 6441, - "logprob": -0.0020179749, - "special": false, - "text": "mem" - }, - { - "id": 70, - "logprob": -9.059906e-06, - "special": false, - "text": "e" - }, - { - "id": 3, - "logprob": -0.00096797943, - "special": false, - "text": "\"" - }, - { - "id": 369, - "logprob": -0.07940674, - "special": false, - "text": " was" - }, - { - "id": 806, - "logprob": -0.12182617, - "special": false, - "text": " first" - }, - { - "id": 908, - "logprob": -0.017227173, - "special": false, - "text": " used" - }, - { - "id": 275, - "logprob": -0.44482422, - "special": false, - "text": " in" - } - ] - }, - "generated_text": "The word \"meme\" was first used in" } ] diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 6a0f32a1..aa3eca33 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -19,7 +19,7 @@ from text_generation_server.models.t5 import T5Sharded from text_generation_server.models.gpt_neox import GPTNeoxSharded try: - if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION").lower() == "false": + if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION", "").lower() == "false": major, minor = torch.cuda.get_device_capability() is_sm75 = major == 7 and minor == 5 is_sm8x = major == 8 and minor >= 0