From c5995652b065279d84ca8be9e53ef5ea2ee69862 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-41-161.ec2.internal>
Date: Wed, 7 Jun 2023 07:52:15 +0000
Subject: [PATCH] Fix regular flash

---
 .gitignore                                    |   2 +-
 .../test_neox_sharded/test_neox_load.json     | 378 +++++++++---------
 .../text_generation_server/models/__init__.py |   2 +-
 3 files changed, 191 insertions(+), 191 deletions(-)

diff --git a/.gitignore b/.gitignore
index 4f8f7b87..20c9baee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,4 @@
 .idea
 target
 router/tokenizer.json
-.*__pycache__.*
+*__pycache__*
diff --git a/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json b/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json
index 15637cdb..0b38e701 100644
--- a/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json
+++ b/integration-tests/models/__snapshots__/test_neox_sharded/test_neox_load.json
@@ -17,82 +17,82 @@
         },
         {
           "id": 310,
-          "logprob": -5.4140625,
+          "logprob": -5.4179688,
           "text": " is"
         },
         {
           "id": 247,
-          "logprob": -2.1621094,
+          "logprob": -2.1542969,
           "text": " a"
         },
         {
           "id": 1167,
-          "logprob": -5.453125,
+          "logprob": -5.359375,
           "text": " mem"
         },
         {
           "id": 70,
-          "logprob": -0.005393982,
+          "logprob": -0.006038666,
           "text": "e"
         },
         {
           "id": 13,
-          "logprob": -7.390625,
+          "logprob": -7.328125,
           "text": ","
         },
         {
           "id": 285,
-          "logprob": -0.33691406,
+          "logprob": -0.3173828,
           "text": " and"
         },
         {
           "id": 752,
-          "logprob": -2.2207031,
+          "logprob": -2.0625,
           "text": " what"
         },
         {
           "id": 434,
-          "logprob": -5.5976562,
+          "logprob": -5.7734375,
           "text": "'s"
         },
         {
           "id": 253,
-          "logprob": -0.7661133,
+          "logprob": -0.74072266,
           "text": " the"
         },
         {
           "id": 2892,
-          "logprob": -6.515625,
+          "logprob": -6.5898438,
           "text": " history"
         },
         {
           "id": 3212,
-          "logprob": -2.3085938,
+          "logprob": -2.2949219,
           "text": " behind"
         },
         {
           "id": 436,
-          "logprob": -11.3203125,
+          "logprob": -11.40625,
           "text": " this"
         },
         {
           "id": 3159,
-          "logprob": -2.1230469,
+          "logprob": -2.1113281,
           "text": " word"
         },
         {
           "id": 32,
-          "logprob": -0.00856781,
+          "logprob": -0.008056641,
           "text": "?"
         },
         {
           "id": 0,
-          "logprob": -2.4296875,
+          "logprob": -2.3300781,
           "text": "<|endoftext|>"
         },
         {
           "id": 50281,
-          "logprob": -18.1875,
+          "logprob": -18.28125,
           "text": "<|assistant|>"
         }
       ],
@@ -100,61 +100,224 @@
       "tokens": [
         {
           "id": 510,
-          "logprob": -0.64208984,
+          "logprob": -0.5878906,
           "special": false,
           "text": "The"
         },
         {
           "id": 3159,
-          "logprob": -0.5839844,
+          "logprob": -0.5498047,
           "special": false,
           "text": " word"
         },
         {
           "id": 346,
-          "logprob": -0.04989624,
+          "logprob": -0.04815674,
           "special": false,
           "text": " \""
         },
         {
           "id": 6441,
-          "logprob": -0.0021305084,
+          "logprob": -0.002313614,
           "special": false,
           "text": "mem"
         },
         {
           "id": 70,
-          "logprob": -1.180172e-05,
+          "logprob": -1.2636185e-05,
           "special": false,
           "text": "e"
         },
         {
           "id": 3,
-          "logprob": -0.00092983246,
+          "logprob": -0.0010147095,
           "special": false,
           "text": "\""
         },
         {
           "id": 369,
-          "logprob": -0.08496094,
+          "logprob": -0.0859375,
           "special": false,
           "text": " was"
         },
         {
           "id": 806,
-          "logprob": -0.13256836,
+          "logprob": -0.12609863,
           "special": false,
           "text": " first"
         },
         {
           "id": 908,
-          "logprob": -0.017059326,
+          "logprob": -0.016601562,
           "special": false,
           "text": " used"
         },
         {
           "id": 275,
-          "logprob": -0.4921875,
+          "logprob": -0.38256836,
+          "special": false,
+          "text": " in"
+        }
+      ]
+    },
+    "generated_text": "The word \"meme\" was first used in"
+  },
+  {
+    "details": {
+      "best_of_sequences": null,
+      "finish_reason": "length",
+      "generated_tokens": 10,
+      "prefill": [
+        {
+          "id": 50278,
+          "logprob": null,
+          "text": "<|prompter|>"
+        },
+        {
+          "id": 1276,
+          "logprob": -8.0234375,
+          "text": "What"
+        },
+        {
+          "id": 310,
+          "logprob": -5.421875,
+          "text": " is"
+        },
+        {
+          "id": 247,
+          "logprob": -2.1640625,
+          "text": " a"
+        },
+        {
+          "id": 1167,
+          "logprob": -5.40625,
+          "text": " mem"
+        },
+        {
+          "id": 70,
+          "logprob": -0.005420685,
+          "text": "e"
+        },
+        {
+          "id": 13,
+          "logprob": -7.2226562,
+          "text": ","
+        },
+        {
+          "id": 285,
+          "logprob": -0.26879883,
+          "text": " and"
+        },
+        {
+          "id": 752,
+          "logprob": -2.1992188,
+          "text": " what"
+        },
+        {
+          "id": 434,
+          "logprob": -5.46875,
+          "text": "'s"
+        },
+        {
+          "id": 253,
+          "logprob": -0.8017578,
+          "text": " the"
+        },
+        {
+          "id": 2892,
+          "logprob": -6.6796875,
+          "text": " history"
+        },
+        {
+          "id": 3212,
+          "logprob": -2.1972656,
+          "text": " behind"
+        },
+        {
+          "id": 436,
+          "logprob": -11.4453125,
+          "text": " this"
+        },
+        {
+          "id": 3159,
+          "logprob": -2.1933594,
+          "text": " word"
+        },
+        {
+          "id": 32,
+          "logprob": -0.007858276,
+          "text": "?"
+        },
+        {
+          "id": 0,
+          "logprob": -2.328125,
+          "text": "<|endoftext|>"
+        },
+        {
+          "id": 50281,
+          "logprob": -18.21875,
+          "text": "<|assistant|>"
+        }
+      ],
+      "seed": null,
+      "tokens": [
+        {
+          "id": 510,
+          "logprob": -0.6201172,
+          "special": false,
+          "text": "The"
+        },
+        {
+          "id": 3159,
+          "logprob": -0.546875,
+          "special": false,
+          "text": " word"
+        },
+        {
+          "id": 346,
+          "logprob": -0.051879883,
+          "special": false,
+          "text": " \""
+        },
+        {
+          "id": 6441,
+          "logprob": -0.0020179749,
+          "special": false,
+          "text": "mem"
+        },
+        {
+          "id": 70,
+          "logprob": -9.059906e-06,
+          "special": false,
+          "text": "e"
+        },
+        {
+          "id": 3,
+          "logprob": -0.00096797943,
+          "special": false,
+          "text": "\""
+        },
+        {
+          "id": 369,
+          "logprob": -0.07940674,
+          "special": false,
+          "text": " was"
+        },
+        {
+          "id": 806,
+          "logprob": -0.12182617,
+          "special": false,
+          "text": " first"
+        },
+        {
+          "id": 908,
+          "logprob": -0.017227173,
+          "special": false,
+          "text": " used"
+        },
+        {
+          "id": 275,
+          "logprob": -0.44482422,
           "special": false,
           "text": " in"
         }
@@ -487,168 +650,5 @@
       ]
     },
     "generated_text": "The word \"meme\" was first used in"
-  },
-  {
-    "details": {
-      "best_of_sequences": null,
-      "finish_reason": "length",
-      "generated_tokens": 10,
-      "prefill": [
-        {
-          "id": 50278,
-          "logprob": null,
-          "text": "<|prompter|>"
-        },
-        {
-          "id": 1276,
-          "logprob": -8.0234375,
-          "text": "What"
-        },
-        {
-          "id": 310,
-          "logprob": -5.421875,
-          "text": " is"
-        },
-        {
-          "id": 247,
-          "logprob": -2.1640625,
-          "text": " a"
-        },
-        {
-          "id": 1167,
-          "logprob": -5.40625,
-          "text": " mem"
-        },
-        {
-          "id": 70,
-          "logprob": -0.005420685,
-          "text": "e"
-        },
-        {
-          "id": 13,
-          "logprob": -7.2226562,
-          "text": ","
-        },
-        {
-          "id": 285,
-          "logprob": -0.26879883,
-          "text": " and"
-        },
-        {
-          "id": 752,
-          "logprob": -2.1992188,
-          "text": " what"
-        },
-        {
-          "id": 434,
-          "logprob": -5.46875,
-          "text": "'s"
-        },
-        {
-          "id": 253,
-          "logprob": -0.8017578,
-          "text": " the"
-        },
-        {
-          "id": 2892,
-          "logprob": -6.6796875,
-          "text": " history"
-        },
-        {
-          "id": 3212,
-          "logprob": -2.1972656,
-          "text": " behind"
-        },
-        {
-          "id": 436,
-          "logprob": -11.4453125,
-          "text": " this"
-        },
-        {
-          "id": 3159,
-          "logprob": -2.1933594,
-          "text": " word"
-        },
-        {
-          "id": 32,
-          "logprob": -0.007858276,
-          "text": "?"
-        },
-        {
-          "id": 0,
-          "logprob": -2.328125,
-          "text": "<|endoftext|>"
-        },
-        {
-          "id": 50281,
-          "logprob": -18.21875,
-          "text": "<|assistant|>"
-        }
-      ],
-      "seed": null,
-      "tokens": [
-        {
-          "id": 510,
-          "logprob": -0.6201172,
-          "special": false,
-          "text": "The"
-        },
-        {
-          "id": 3159,
-          "logprob": -0.546875,
-          "special": false,
-          "text": " word"
-        },
-        {
-          "id": 346,
-          "logprob": -0.051879883,
-          "special": false,
-          "text": " \""
-        },
-        {
-          "id": 6441,
-          "logprob": -0.0020179749,
-          "special": false,
-          "text": "mem"
-        },
-        {
-          "id": 70,
-          "logprob": -9.059906e-06,
-          "special": false,
-          "text": "e"
-        },
-        {
-          "id": 3,
-          "logprob": -0.00096797943,
-          "special": false,
-          "text": "\""
-        },
-        {
-          "id": 369,
-          "logprob": -0.07940674,
-          "special": false,
-          "text": " was"
-        },
-        {
-          "id": 806,
-          "logprob": -0.12182617,
-          "special": false,
-          "text": " first"
-        },
-        {
-          "id": 908,
-          "logprob": -0.017227173,
-          "special": false,
-          "text": " used"
-        },
-        {
-          "id": 275,
-          "logprob": -0.44482422,
-          "special": false,
-          "text": " in"
-        }
-      ]
-    },
-    "generated_text": "The word \"meme\" was first used in"
   }
 ]
diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py
index 6a0f32a1..aa3eca33 100644
--- a/server/text_generation_server/models/__init__.py
+++ b/server/text_generation_server/models/__init__.py
@@ -19,7 +19,7 @@ from text_generation_server.models.t5 import T5Sharded
 from text_generation_server.models.gpt_neox import GPTNeoxSharded
 
 try:
-    if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION").lower() == "false":
+    if torch.cuda.is_available() and not os.getenv("USE_FLASH_ATTENTION", "").lower() == "false":
         major, minor = torch.cuda.get_device_capability()
         is_sm75 = major == 7 and minor == 5
         is_sm8x = major == 8 and minor >= 0