Fix regular flash

This commit is contained in:
Ubuntu 2023-06-07 07:52:15 +00:00
parent 877d4d4aeb
commit c5995652b0
3 changed files with 191 additions and 191 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
.idea
target
router/tokenizer.json
.*__pycache__.*
*__pycache__*

View File

@ -17,82 +17,82 @@
},
{
"id": 310,
"logprob": -5.4140625,
"logprob": -5.4179688,
"text": " is"
},
{
"id": 247,
"logprob": -2.1621094,
"logprob": -2.1542969,
"text": " a"
},
{
"id": 1167,
"logprob": -5.453125,
"logprob": -5.359375,
"text": " mem"
},
{
"id": 70,
"logprob": -0.005393982,
"logprob": -0.006038666,
"text": "e"
},
{
"id": 13,
"logprob": -7.390625,
"logprob": -7.328125,
"text": ","
},
{
"id": 285,
"logprob": -0.33691406,
"logprob": -0.3173828,
"text": " and"
},
{
"id": 752,
"logprob": -2.2207031,
"logprob": -2.0625,
"text": " what"
},
{
"id": 434,
"logprob": -5.5976562,
"logprob": -5.7734375,
"text": "'s"
},
{
"id": 253,
"logprob": -0.7661133,
"logprob": -0.74072266,
"text": " the"
},
{
"id": 2892,
"logprob": -6.515625,
"logprob": -6.5898438,
"text": " history"
},
{
"id": 3212,
"logprob": -2.3085938,
"logprob": -2.2949219,
"text": " behind"
},
{
"id": 436,
"logprob": -11.3203125,
"logprob": -11.40625,
"text": " this"
},
{
"id": 3159,
"logprob": -2.1230469,
"logprob": -2.1113281,
"text": " word"
},
{
"id": 32,
"logprob": -0.00856781,
"logprob": -0.008056641,
"text": "?"
},
{
"id": 0,
"logprob": -2.4296875,
"logprob": -2.3300781,
"text": "<|endoftext|>"
},
{
"id": 50281,
"logprob": -18.1875,
"logprob": -18.28125,
"text": "<|assistant|>"
}
],
@ -100,61 +100,224 @@
"tokens": [
{
"id": 510,
"logprob": -0.64208984,
"logprob": -0.5878906,
"special": false,
"text": "The"
},
{
"id": 3159,
"logprob": -0.5839844,
"logprob": -0.5498047,
"special": false,
"text": " word"
},
{
"id": 346,
"logprob": -0.04989624,
"logprob": -0.04815674,
"special": false,
"text": " \""
},
{
"id": 6441,
"logprob": -0.0021305084,
"logprob": -0.002313614,
"special": false,
"text": "mem"
},
{
"id": 70,
"logprob": -1.180172e-05,
"logprob": -1.2636185e-05,
"special": false,
"text": "e"
},
{
"id": 3,
"logprob": -0.00092983246,
"logprob": -0.0010147095,
"special": false,
"text": "\""
},
{
"id": 369,
"logprob": -0.08496094,
"logprob": -0.0859375,
"special": false,
"text": " was"
},
{
"id": 806,
"logprob": -0.13256836,
"logprob": -0.12609863,
"special": false,
"text": " first"
},
{
"id": 908,
"logprob": -0.017059326,
"logprob": -0.016601562,
"special": false,
"text": " used"
},
{
"id": 275,
"logprob": -0.4921875,
"logprob": -0.38256836,
"special": false,
"text": " in"
}
]
},
"generated_text": "The word \"meme\" was first used in"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 50278,
"logprob": null,
"text": "<|prompter|>"
},
{
"id": 1276,
"logprob": -8.0234375,
"text": "What"
},
{
"id": 310,
"logprob": -5.421875,
"text": " is"
},
{
"id": 247,
"logprob": -2.1640625,
"text": " a"
},
{
"id": 1167,
"logprob": -5.40625,
"text": " mem"
},
{
"id": 70,
"logprob": -0.005420685,
"text": "e"
},
{
"id": 13,
"logprob": -7.2226562,
"text": ","
},
{
"id": 285,
"logprob": -0.26879883,
"text": " and"
},
{
"id": 752,
"logprob": -2.1992188,
"text": " what"
},
{
"id": 434,
"logprob": -5.46875,
"text": "'s"
},
{
"id": 253,
"logprob": -0.8017578,
"text": " the"
},
{
"id": 2892,
"logprob": -6.6796875,
"text": " history"
},
{
"id": 3212,
"logprob": -2.1972656,
"text": " behind"
},
{
"id": 436,
"logprob": -11.4453125,
"text": " this"
},
{
"id": 3159,
"logprob": -2.1933594,
"text": " word"
},
{
"id": 32,
"logprob": -0.007858276,
"text": "?"
},
{
"id": 0,
"logprob": -2.328125,
"text": "<|endoftext|>"
},
{
"id": 50281,
"logprob": -18.21875,
"text": "<|assistant|>"
}
],
"seed": null,
"tokens": [
{
"id": 510,
"logprob": -0.6201172,
"special": false,
"text": "The"
},
{
"id": 3159,
"logprob": -0.546875,
"special": false,
"text": " word"
},
{
"id": 346,
"logprob": -0.051879883,
"special": false,
"text": " \""
},
{
"id": 6441,
"logprob": -0.0020179749,
"special": false,
"text": "mem"
},
{
"id": 70,
"logprob": -9.059906e-06,
"special": false,
"text": "e"
},
{
"id": 3,
"logprob": -0.00096797943,
"special": false,
"text": "\""
},
{
"id": 369,
"logprob": -0.07940674,
"special": false,
"text": " was"
},
{
"id": 806,
"logprob": -0.12182617,
"special": false,
"text": " first"
},
{
"id": 908,
"logprob": -0.017227173,
"special": false,
"text": " used"
},
{
"id": 275,
"logprob": -0.44482422,
"special": false,
"text": " in"
}
@ -487,168 +650,5 @@
]
},
"generated_text": "The word \"meme\" was first used in"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 50278,
"logprob": null,
"text": "<|prompter|>"
},
{
"id": 1276,
"logprob": -8.0234375,
"text": "What"
},
{
"id": 310,
"logprob": -5.421875,
"text": " is"
},
{
"id": 247,
"logprob": -2.1640625,
"text": " a"
},
{
"id": 1167,
"logprob": -5.40625,
"text": " mem"
},
{
"id": 70,
"logprob": -0.005420685,
"text": "e"
},
{
"id": 13,
"logprob": -7.2226562,
"text": ","
},
{
"id": 285,
"logprob": -0.26879883,
"text": " and"
},
{
"id": 752,
"logprob": -2.1992188,
"text": " what"
},
{
"id": 434,
"logprob": -5.46875,
"text": "'s"
},
{
"id": 253,
"logprob": -0.8017578,
"text": " the"
},
{
"id": 2892,
"logprob": -6.6796875,
"text": " history"
},
{
"id": 3212,
"logprob": -2.1972656,
"text": " behind"
},
{
"id": 436,
"logprob": -11.4453125,
"text": " this"
},
{
"id": 3159,
"logprob": -2.1933594,
"text": " word"
},
{
"id": 32,
"logprob": -0.007858276,
"text": "?"
},
{
"id": 0,
"logprob": -2.328125,
"text": "<|endoftext|>"
},
{
"id": 50281,
"logprob": -18.21875,
"text": "<|assistant|>"
}
],
"seed": null,
"tokens": [
{
"id": 510,
"logprob": -0.6201172,
"special": false,
"text": "The"
},
{
"id": 3159,
"logprob": -0.546875,
"special": false,
"text": " word"
},
{
"id": 346,
"logprob": -0.051879883,
"special": false,
"text": " \""
},
{
"id": 6441,
"logprob": -0.0020179749,
"special": false,
"text": "mem"
},
{
"id": 70,
"logprob": -9.059906e-06,
"special": false,
"text": "e"
},
{
"id": 3,
"logprob": -0.00096797943,
"special": false,
"text": "\""
},
{
"id": 369,
"logprob": -0.07940674,
"special": false,
"text": " was"
},
{
"id": 806,
"logprob": -0.12182617,
"special": false,
"text": " first"
},
{
"id": 908,
"logprob": -0.017227173,
"special": false,
"text": " used"
},
{
"id": 275,
"logprob": -0.44482422,
"special": false,
"text": " in"
}
]
},
"generated_text": "The word \"meme\" was first used in"
}
]

View File

@ -19,7 +19,7 @@ from text_generation_server.models.t5 import T5Sharded
from text_generation_server.models.gpt_neox import GPTNeoxSharded
try:
if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION").lower() == "false":
if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION", "").lower() == "false":
major, minor = torch.cuda.get_device_capability()
is_sm75 = major == 7 and minor == 5
is_sm8x = major == 8 and minor >= 0