Fix regular flash

This commit is contained in:
Ubuntu 2023-06-07 07:52:15 +00:00
parent 877d4d4aeb
commit c5995652b0
3 changed files with 191 additions and 191 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
.idea .idea
target target
router/tokenizer.json router/tokenizer.json
.*__pycache__.* *__pycache__*

View File

@ -17,82 +17,82 @@
}, },
{ {
"id": 310, "id": 310,
"logprob": -5.4140625, "logprob": -5.4179688,
"text": " is" "text": " is"
}, },
{ {
"id": 247, "id": 247,
"logprob": -2.1621094, "logprob": -2.1542969,
"text": " a" "text": " a"
}, },
{ {
"id": 1167, "id": 1167,
"logprob": -5.453125, "logprob": -5.359375,
"text": " mem" "text": " mem"
}, },
{ {
"id": 70, "id": 70,
"logprob": -0.005393982, "logprob": -0.006038666,
"text": "e" "text": "e"
}, },
{ {
"id": 13, "id": 13,
"logprob": -7.390625, "logprob": -7.328125,
"text": "," "text": ","
}, },
{ {
"id": 285, "id": 285,
"logprob": -0.33691406, "logprob": -0.3173828,
"text": " and" "text": " and"
}, },
{ {
"id": 752, "id": 752,
"logprob": -2.2207031, "logprob": -2.0625,
"text": " what" "text": " what"
}, },
{ {
"id": 434, "id": 434,
"logprob": -5.5976562, "logprob": -5.7734375,
"text": "'s" "text": "'s"
}, },
{ {
"id": 253, "id": 253,
"logprob": -0.7661133, "logprob": -0.74072266,
"text": " the" "text": " the"
}, },
{ {
"id": 2892, "id": 2892,
"logprob": -6.515625, "logprob": -6.5898438,
"text": " history" "text": " history"
}, },
{ {
"id": 3212, "id": 3212,
"logprob": -2.3085938, "logprob": -2.2949219,
"text": " behind" "text": " behind"
}, },
{ {
"id": 436, "id": 436,
"logprob": -11.3203125, "logprob": -11.40625,
"text": " this" "text": " this"
}, },
{ {
"id": 3159, "id": 3159,
"logprob": -2.1230469, "logprob": -2.1113281,
"text": " word" "text": " word"
}, },
{ {
"id": 32, "id": 32,
"logprob": -0.00856781, "logprob": -0.008056641,
"text": "?" "text": "?"
}, },
{ {
"id": 0, "id": 0,
"logprob": -2.4296875, "logprob": -2.3300781,
"text": "<|endoftext|>" "text": "<|endoftext|>"
}, },
{ {
"id": 50281, "id": 50281,
"logprob": -18.1875, "logprob": -18.28125,
"text": "<|assistant|>" "text": "<|assistant|>"
} }
], ],
@ -100,61 +100,224 @@
"tokens": [ "tokens": [
{ {
"id": 510, "id": 510,
"logprob": -0.64208984, "logprob": -0.5878906,
"special": false, "special": false,
"text": "The" "text": "The"
}, },
{ {
"id": 3159, "id": 3159,
"logprob": -0.5839844, "logprob": -0.5498047,
"special": false, "special": false,
"text": " word" "text": " word"
}, },
{ {
"id": 346, "id": 346,
"logprob": -0.04989624, "logprob": -0.04815674,
"special": false, "special": false,
"text": " \"" "text": " \""
}, },
{ {
"id": 6441, "id": 6441,
"logprob": -0.0021305084, "logprob": -0.002313614,
"special": false, "special": false,
"text": "mem" "text": "mem"
}, },
{ {
"id": 70, "id": 70,
"logprob": -1.180172e-05, "logprob": -1.2636185e-05,
"special": false, "special": false,
"text": "e" "text": "e"
}, },
{ {
"id": 3, "id": 3,
"logprob": -0.00092983246, "logprob": -0.0010147095,
"special": false, "special": false,
"text": "\"" "text": "\""
}, },
{ {
"id": 369, "id": 369,
"logprob": -0.08496094, "logprob": -0.0859375,
"special": false, "special": false,
"text": " was" "text": " was"
}, },
{ {
"id": 806, "id": 806,
"logprob": -0.13256836, "logprob": -0.12609863,
"special": false, "special": false,
"text": " first" "text": " first"
}, },
{ {
"id": 908, "id": 908,
"logprob": -0.017059326, "logprob": -0.016601562,
"special": false, "special": false,
"text": " used" "text": " used"
}, },
{ {
"id": 275, "id": 275,
"logprob": -0.4921875, "logprob": -0.38256836,
"special": false,
"text": " in"
}
]
},
"generated_text": "The word \"meme\" was first used in"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 50278,
"logprob": null,
"text": "<|prompter|>"
},
{
"id": 1276,
"logprob": -8.0234375,
"text": "What"
},
{
"id": 310,
"logprob": -5.421875,
"text": " is"
},
{
"id": 247,
"logprob": -2.1640625,
"text": " a"
},
{
"id": 1167,
"logprob": -5.40625,
"text": " mem"
},
{
"id": 70,
"logprob": -0.005420685,
"text": "e"
},
{
"id": 13,
"logprob": -7.2226562,
"text": ","
},
{
"id": 285,
"logprob": -0.26879883,
"text": " and"
},
{
"id": 752,
"logprob": -2.1992188,
"text": " what"
},
{
"id": 434,
"logprob": -5.46875,
"text": "'s"
},
{
"id": 253,
"logprob": -0.8017578,
"text": " the"
},
{
"id": 2892,
"logprob": -6.6796875,
"text": " history"
},
{
"id": 3212,
"logprob": -2.1972656,
"text": " behind"
},
{
"id": 436,
"logprob": -11.4453125,
"text": " this"
},
{
"id": 3159,
"logprob": -2.1933594,
"text": " word"
},
{
"id": 32,
"logprob": -0.007858276,
"text": "?"
},
{
"id": 0,
"logprob": -2.328125,
"text": "<|endoftext|>"
},
{
"id": 50281,
"logprob": -18.21875,
"text": "<|assistant|>"
}
],
"seed": null,
"tokens": [
{
"id": 510,
"logprob": -0.6201172,
"special": false,
"text": "The"
},
{
"id": 3159,
"logprob": -0.546875,
"special": false,
"text": " word"
},
{
"id": 346,
"logprob": -0.051879883,
"special": false,
"text": " \""
},
{
"id": 6441,
"logprob": -0.0020179749,
"special": false,
"text": "mem"
},
{
"id": 70,
"logprob": -9.059906e-06,
"special": false,
"text": "e"
},
{
"id": 3,
"logprob": -0.00096797943,
"special": false,
"text": "\""
},
{
"id": 369,
"logprob": -0.07940674,
"special": false,
"text": " was"
},
{
"id": 806,
"logprob": -0.12182617,
"special": false,
"text": " first"
},
{
"id": 908,
"logprob": -0.017227173,
"special": false,
"text": " used"
},
{
"id": 275,
"logprob": -0.44482422,
"special": false, "special": false,
"text": " in" "text": " in"
} }
@ -487,168 +650,5 @@
] ]
}, },
"generated_text": "The word \"meme\" was first used in" "generated_text": "The word \"meme\" was first used in"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "length",
"generated_tokens": 10,
"prefill": [
{
"id": 50278,
"logprob": null,
"text": "<|prompter|>"
},
{
"id": 1276,
"logprob": -8.0234375,
"text": "What"
},
{
"id": 310,
"logprob": -5.421875,
"text": " is"
},
{
"id": 247,
"logprob": -2.1640625,
"text": " a"
},
{
"id": 1167,
"logprob": -5.40625,
"text": " mem"
},
{
"id": 70,
"logprob": -0.005420685,
"text": "e"
},
{
"id": 13,
"logprob": -7.2226562,
"text": ","
},
{
"id": 285,
"logprob": -0.26879883,
"text": " and"
},
{
"id": 752,
"logprob": -2.1992188,
"text": " what"
},
{
"id": 434,
"logprob": -5.46875,
"text": "'s"
},
{
"id": 253,
"logprob": -0.8017578,
"text": " the"
},
{
"id": 2892,
"logprob": -6.6796875,
"text": " history"
},
{
"id": 3212,
"logprob": -2.1972656,
"text": " behind"
},
{
"id": 436,
"logprob": -11.4453125,
"text": " this"
},
{
"id": 3159,
"logprob": -2.1933594,
"text": " word"
},
{
"id": 32,
"logprob": -0.007858276,
"text": "?"
},
{
"id": 0,
"logprob": -2.328125,
"text": "<|endoftext|>"
},
{
"id": 50281,
"logprob": -18.21875,
"text": "<|assistant|>"
}
],
"seed": null,
"tokens": [
{
"id": 510,
"logprob": -0.6201172,
"special": false,
"text": "The"
},
{
"id": 3159,
"logprob": -0.546875,
"special": false,
"text": " word"
},
{
"id": 346,
"logprob": -0.051879883,
"special": false,
"text": " \""
},
{
"id": 6441,
"logprob": -0.0020179749,
"special": false,
"text": "mem"
},
{
"id": 70,
"logprob": -9.059906e-06,
"special": false,
"text": "e"
},
{
"id": 3,
"logprob": -0.00096797943,
"special": false,
"text": "\""
},
{
"id": 369,
"logprob": -0.07940674,
"special": false,
"text": " was"
},
{
"id": 806,
"logprob": -0.12182617,
"special": false,
"text": " first"
},
{
"id": 908,
"logprob": -0.017227173,
"special": false,
"text": " used"
},
{
"id": 275,
"logprob": -0.44482422,
"special": false,
"text": " in"
}
]
},
"generated_text": "The word \"meme\" was first used in"
} }
] ]

View File

@ -19,7 +19,7 @@ from text_generation_server.models.t5 import T5Sharded
from text_generation_server.models.gpt_neox import GPTNeoxSharded from text_generation_server.models.gpt_neox import GPTNeoxSharded
try: try:
if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION").lower() == "false": if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION", "").lower() == "false":
major, minor = torch.cuda.get_device_capability() major, minor = torch.cuda.get_device_capability()
is_sm75 = major == 7 and minor == 5 is_sm75 = major == 7 and minor == 5
is_sm8x = major == 8 and minor >= 0 is_sm8x = major == 8 and minor >= 0