From 8f28011e1e7e1367b0a6f4c0a4f3ff73e859ef23 Mon Sep 17 00:00:00 2001 From: OlivierDehaene <23298448+OlivierDehaene@users.noreply.github.com> Date: Tue, 30 May 2023 15:53:20 +0200 Subject: [PATCH] add integration tests --- integration-tests/conftest.py | 16 +- .../test_flash_falcon/test_flash_falcon.json | 378 ++++ .../test_flash_falcon_all_params.json | 98 ++ .../test_flash_falcon_load.json | 1514 +++++++++++++++++ integration-tests/models/test_flash_falcon.py | 63 + .../text_generation_server/models/flash_rw.py | 6 +- 6 files changed, 2070 insertions(+), 5 deletions(-) create mode 100644 integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json create mode 100644 integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json create mode 100644 integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json create mode 100644 integration-tests/models/test_flash_falcon.py diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 7db12424..902a7158 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -205,7 +205,10 @@ def event_loop(): def launcher(event_loop): @contextlib.contextmanager def local_launcher( - model_id: str, num_shard: Optional[int] = None, quantize: Optional[str] = None + model_id: str, + num_shard: Optional[int] = None, + quantize: Optional[str] = None, + trust_remote_code: bool = False, ): port = random.randint(8000, 10_000) master_port = random.randint(10_000, 20_000) @@ -230,6 +233,9 @@ def launcher(event_loop): args.extend(["--num-shard", str(num_shard)]) if quantize: args.append("--quantize") + args.append("bitsandbytes") + if trust_remote_code: + args.append("--trust-remote-code") env = os.environ env["LOG_LEVEL"] = "info,text_generation_router=debug" @@ -250,7 +256,10 @@ def launcher(event_loop): @contextlib.contextmanager def docker_launcher( - model_id: str, num_shard: Optional[int] = None, quantize: Optional[str] = None + model_id: str, + num_shard: Optional[int] = None, + quantize: Optional[str] = None, + trust_remote_code: bool = False, ): port = random.randint(8000, 10_000) @@ -260,6 +269,9 @@ def launcher(event_loop): args.extend(["--num-shard", str(num_shard)]) if quantize: args.append("--quantize") + args.append("bitsandbytes") + if trust_remote_code: + args.append("--trust-remote-code") client = docker.from_env() diff --git a/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json b/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json new file mode 100644 index 00000000..488f3de3 --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon.json @@ -0,0 +1,378 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50, + "logprob": null, + "text": "G" + }, + { + "id": 330, + "logprob": -5.96875, + "text": "ir" + }, + { + "id": 1622, + "logprob": -5.6132812, + "text": "af" + }, + { + "id": 249, + "logprob": -6.5039062, + "text": "at" + }, + { + "id": 1480, + "logprob": -8.078125, + "text": "ron" + }, + { + "id": 304, + "logprob": -2.3261719, + "text": " is" + }, + { + "id": 23866, + "logprob": -9.59375, + "text": " obsessed" + }, + { + "id": 335, + "logprob": -0.048339844, + "text": " with" + }, + { + "id": 26680, + "logprob": -4.0, + "text": " gir" + }, + { + "id": 1903, + "logprob": -0.07556152, + "text": "aff" + }, + { + "id": 255, + "logprob": -0.0067749023, + "text": "es" + }, + { + "id": 23, + "logprob": -1.546875, + "text": "," + }, + { + "id": 248, + "logprob": -4.3320312, + "text": " the" + }, + { + "id": 758, + "logprob": -3.734375, + "text": " most" + }, + { + "id": 21735, + "logprob": -5.109375, + "text": " glorious" + }, + { + "id": 5985, + "logprob": -2.09375, + "text": " animal" + }, + { + "id": 313, + "logprob": -1.1835938, + "text": " on" + }, + { + "id": 248, + "logprob": -0.77685547, + "text": " the" + }, + { + "id": 1936, + "logprob": -2.3828125, + "text": " face" + }, + { + "id": 275, + "logprob": -0.004432678, + "text": " of" + }, + { + "id": 414, + "logprob": -1.9677734, + "text": " this" + }, + { + "id": 6490, + "logprob": -2.046875, + "text": " Earth" + }, + { + "id": 25, + "logprob": -0.28198242, + "text": "." + }, + { + "id": 401, + "logprob": -7.9179688, + "text": " G" + }, + { + "id": 6013, + "logprob": -2.2753906, + "text": "ira" + }, + { + "id": 694, + "logprob": -0.6230469, + "text": "ft" + }, + { + "id": 1480, + "logprob": -0.20874023, + "text": "ron" + }, + { + "id": 9369, + "logprob": -4.5507812, + "text": " believes" + }, + { + "id": 455, + "logprob": -4.5664062, + "text": " all" + }, + { + "id": 599, + "logprob": -2.7402344, + "text": " other" + }, + { + "id": 5632, + "logprob": -0.21948242, + "text": " animals" + }, + { + "id": 362, + "logprob": -0.7675781, + "text": " are" + }, + { + "id": 23981, + "logprob": -5.0, + "text": " irrelevant" + }, + { + "id": 635, + "logprob": -4.234375, + "text": " when" + }, + { + "id": 4354, + "logprob": -0.5131836, + "text": " compared" + }, + { + "id": 271, + "logprob": -0.103637695, + "text": " to" + }, + { + "id": 248, + "logprob": -0.58447266, + "text": " the" + }, + { + "id": 21735, + "logprob": -3.6835938, + "text": " glorious" + }, + { + "id": 64398, + "logprob": -1.8173828, + "text": " majesty" + }, + { + "id": 275, + "logprob": -0.23510742, + "text": " of" + }, + { + "id": 248, + "logprob": -0.35473633, + "text": " the" + }, + { + "id": 26680, + "logprob": -0.24633789, + "text": " gir" + }, + { + "id": 23226, + "logprob": -0.02960205, + "text": "affe" + }, + { + "id": 25, + "logprob": -0.17333984, + "text": "." + }, + { + "id": 193, + "logprob": -1.3935547, + "text": "\n" + }, + { + "id": 23626, + "logprob": -10.0625, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -4.59375, + "text": ":" + }, + { + "id": 23090, + "logprob": -6.9375, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.99365234, + "text": "," + }, + { + "id": 29033, + "logprob": -2.2324219, + "text": " Gir" + }, + { + "id": 1622, + "logprob": -0.10809326, + "text": "af" + }, + { + "id": 249, + "logprob": -0.042663574, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0024776459, + "text": "ron" + }, + { + "id": 12, + "logprob": -1.4277344, + "text": "!" + }, + { + "id": 193, + "logprob": -1.1015625, + "text": "\n" + }, + { + "id": 50, + "logprob": -0.05709839, + "text": "G" + }, + { + "id": 330, + "logprob": -0.13208008, + "text": "ir" + }, + { + "id": 1622, + "logprob": -0.0071487427, + "text": "af" + }, + { + "id": 249, + "logprob": -0.008468628, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.00068998337, + "text": "ron" + }, + { + "id": 37, + "logprob": -0.0074691772, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 23090, + "logprob": -1.8251953, + "special": false, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.3173828, + "special": false, + "text": "," + }, + { + "id": 8156, + "logprob": -0.23803711, + "special": false, + "text": " Daniel" + }, + { + "id": 12, + "logprob": -0.56933594, + "special": false, + "text": "!" + }, + { + "id": 193, + "logprob": -0.61279297, + "special": false, + "text": "\n" + }, + { + "id": 23626, + "logprob": -0.41967773, + "special": false, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -0.0023403168, + "special": false, + "text": ":" + }, + { + "id": 1634, + "logprob": -2.0605469, + "special": false, + "text": " What" + }, + { + "id": 18, + "logprob": -1.5292969, + "special": false, + "text": "'" + }, + { + "id": 94, + "logprob": -0.007904053, + "special": false, + "text": "s" + } + ] + }, + "generated_text": " Hello, Daniel!\nDaniel: What's" +} diff --git a/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json b/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json new file mode 100644 index 00000000..07f996bc --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_all_params.json @@ -0,0 +1,98 @@ +{ + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 330, + "logprob": null, + "text": "ir" + }, + { + "id": 1622, + "logprob": -7.8125, + "text": "af" + }, + { + "id": 249, + "logprob": -4.5, + "text": "at" + }, + { + "id": 1480, + "logprob": -10.875, + "text": "ron" + }, + { + "id": 37, + "logprob": -3.6875, + "text": ":" + } + ], + "seed": 0, + "tokens": [ + { + "id": 836, + "logprob": -1.265625, + "special": false, + "text": " i" + }, + { + "id": 18, + "logprob": -0.11621094, + "special": false, + "text": "'" + }, + { + "id": 88, + "logprob": 0.0, + "special": false, + "text": "m" + }, + { + "id": 1241, + "logprob": -0.953125, + "special": false, + "text": " using" + }, + { + "id": 248, + "logprob": -2.5, + "special": false, + "text": " the" + }, + { + "id": 204, + "logprob": -0.62890625, + "special": false, + "text": " " + }, + { + "id": 2485, + "logprob": -0.54296875, + "special": false, + "text": "32" + }, + { + "id": 24, + "logprob": 0.0, + "special": false, + "text": "-" + }, + { + "id": 3882, + "logprob": 0.0, + "special": false, + "text": "bit" + }, + { + "id": 2684, + "logprob": 0.0, + "special": false, + "text": " version" + } + ] + }, + "generated_text": "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron: i'm using the 32-bit version" +} diff --git a/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json b/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json new file mode 100644 index 00000000..675b060c --- /dev/null +++ b/integration-tests/models/__snapshots__/test_flash_falcon/test_flash_falcon_load.json @@ -0,0 +1,1514 @@ +[ + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50, + "logprob": null, + "text": "G" + }, + { + "id": 330, + "logprob": -6.0234375, + "text": "ir" + }, + { + "id": 1622, + "logprob": -5.6132812, + "text": "af" + }, + { + "id": 249, + "logprob": -6.4960938, + "text": "at" + }, + { + "id": 1480, + "logprob": -8.0703125, + "text": "ron" + }, + { + "id": 304, + "logprob": -2.328125, + "text": " is" + }, + { + "id": 23866, + "logprob": -9.59375, + "text": " obsessed" + }, + { + "id": 335, + "logprob": -0.048431396, + "text": " with" + }, + { + "id": 26680, + "logprob": -4.0, + "text": " gir" + }, + { + "id": 1903, + "logprob": -0.07519531, + "text": "aff" + }, + { + "id": 255, + "logprob": -0.0067977905, + "text": "es" + }, + { + "id": 23, + "logprob": -1.546875, + "text": "," + }, + { + "id": 248, + "logprob": -4.3320312, + "text": " the" + }, + { + "id": 758, + "logprob": -3.7324219, + "text": " most" + }, + { + "id": 21735, + "logprob": -5.109375, + "text": " glorious" + }, + { + "id": 5985, + "logprob": -2.0917969, + "text": " animal" + }, + { + "id": 313, + "logprob": -1.1835938, + "text": " on" + }, + { + "id": 248, + "logprob": -0.77685547, + "text": " the" + }, + { + "id": 1936, + "logprob": -2.3808594, + "text": " face" + }, + { + "id": 275, + "logprob": -0.0044403076, + "text": " of" + }, + { + "id": 414, + "logprob": -1.9658203, + "text": " this" + }, + { + "id": 6490, + "logprob": -2.0488281, + "text": " Earth" + }, + { + "id": 25, + "logprob": -0.28198242, + "text": "." + }, + { + "id": 401, + "logprob": -7.921875, + "text": " G" + }, + { + "id": 6013, + "logprob": -2.2714844, + "text": "ira" + }, + { + "id": 694, + "logprob": -0.62402344, + "text": "ft" + }, + { + "id": 1480, + "logprob": -0.20812988, + "text": "ron" + }, + { + "id": 9369, + "logprob": -4.5507812, + "text": " believes" + }, + { + "id": 455, + "logprob": -4.5664062, + "text": " all" + }, + { + "id": 599, + "logprob": -2.7402344, + "text": " other" + }, + { + "id": 5632, + "logprob": -0.21899414, + "text": " animals" + }, + { + "id": 362, + "logprob": -0.76708984, + "text": " are" + }, + { + "id": 23981, + "logprob": -4.9960938, + "text": " irrelevant" + }, + { + "id": 635, + "logprob": -4.2304688, + "text": " when" + }, + { + "id": 4354, + "logprob": -0.51416016, + "text": " compared" + }, + { + "id": 271, + "logprob": -0.103393555, + "text": " to" + }, + { + "id": 248, + "logprob": -0.5839844, + "text": " the" + }, + { + "id": 21735, + "logprob": -3.6796875, + "text": " glorious" + }, + { + "id": 64398, + "logprob": -1.8203125, + "text": " majesty" + }, + { + "id": 275, + "logprob": -0.23547363, + "text": " of" + }, + { + "id": 248, + "logprob": -0.35424805, + "text": " the" + }, + { + "id": 26680, + "logprob": -0.24621582, + "text": " gir" + }, + { + "id": 23226, + "logprob": -0.02947998, + "text": "affe" + }, + { + "id": 25, + "logprob": -0.17407227, + "text": "." + }, + { + "id": 193, + "logprob": -1.3935547, + "text": "\n" + }, + { + "id": 23626, + "logprob": -10.0703125, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -4.5976562, + "text": ":" + }, + { + "id": 23090, + "logprob": -6.9375, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.9941406, + "text": "," + }, + { + "id": 29033, + "logprob": -2.2285156, + "text": " Gir" + }, + { + "id": 1622, + "logprob": -0.10797119, + "text": "af" + }, + { + "id": 249, + "logprob": -0.04260254, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0024776459, + "text": "ron" + }, + { + "id": 12, + "logprob": -1.4277344, + "text": "!" + }, + { + "id": 193, + "logprob": -1.1005859, + "text": "\n" + }, + { + "id": 50, + "logprob": -0.0569458, + "text": "G" + }, + { + "id": 330, + "logprob": -0.13171387, + "text": "ir" + }, + { + "id": 1622, + "logprob": -0.007133484, + "text": "af" + }, + { + "id": 249, + "logprob": -0.008453369, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0006904602, + "text": "ron" + }, + { + "id": 37, + "logprob": -0.007511139, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 23090, + "logprob": -1.8242188, + "special": false, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.3173828, + "special": false, + "text": "," + }, + { + "id": 8156, + "logprob": -0.23852539, + "special": false, + "text": " Daniel" + }, + { + "id": 12, + "logprob": -0.5703125, + "special": false, + "text": "!" + }, + { + "id": 193, + "logprob": -0.61328125, + "special": false, + "text": "\n" + }, + { + "id": 23626, + "logprob": -0.42114258, + "special": false, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -0.0023345947, + "special": false, + "text": ":" + }, + { + "id": 1634, + "logprob": -2.0605469, + "special": false, + "text": " What" + }, + { + "id": 18, + "logprob": -1.5302734, + "special": false, + "text": "'" + }, + { + "id": 94, + "logprob": -0.00793457, + "special": false, + "text": "s" + } + ] + }, + "generated_text": " Hello, Daniel!\nDaniel: What's" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50, + "logprob": null, + "text": "G" + }, + { + "id": 330, + "logprob": -6.0234375, + "text": "ir" + }, + { + "id": 1622, + "logprob": -5.6132812, + "text": "af" + }, + { + "id": 249, + "logprob": -6.4960938, + "text": "at" + }, + { + "id": 1480, + "logprob": -8.0703125, + "text": "ron" + }, + { + "id": 304, + "logprob": -2.328125, + "text": " is" + }, + { + "id": 23866, + "logprob": -9.59375, + "text": " obsessed" + }, + { + "id": 335, + "logprob": -0.048431396, + "text": " with" + }, + { + "id": 26680, + "logprob": -4.0, + "text": " gir" + }, + { + "id": 1903, + "logprob": -0.07519531, + "text": "aff" + }, + { + "id": 255, + "logprob": -0.0067977905, + "text": "es" + }, + { + "id": 23, + "logprob": -1.546875, + "text": "," + }, + { + "id": 248, + "logprob": -4.3320312, + "text": " the" + }, + { + "id": 758, + "logprob": -3.7324219, + "text": " most" + }, + { + "id": 21735, + "logprob": -5.109375, + "text": " glorious" + }, + { + "id": 5985, + "logprob": -2.0917969, + "text": " animal" + }, + { + "id": 313, + "logprob": -1.1835938, + "text": " on" + }, + { + "id": 248, + "logprob": -0.77685547, + "text": " the" + }, + { + "id": 1936, + "logprob": -2.3808594, + "text": " face" + }, + { + "id": 275, + "logprob": -0.0044403076, + "text": " of" + }, + { + "id": 414, + "logprob": -1.9658203, + "text": " this" + }, + { + "id": 6490, + "logprob": -2.0488281, + "text": " Earth" + }, + { + "id": 25, + "logprob": -0.28198242, + "text": "." + }, + { + "id": 401, + "logprob": -7.921875, + "text": " G" + }, + { + "id": 6013, + "logprob": -2.2714844, + "text": "ira" + }, + { + "id": 694, + "logprob": -0.62402344, + "text": "ft" + }, + { + "id": 1480, + "logprob": -0.20812988, + "text": "ron" + }, + { + "id": 9369, + "logprob": -4.5507812, + "text": " believes" + }, + { + "id": 455, + "logprob": -4.5664062, + "text": " all" + }, + { + "id": 599, + "logprob": -2.7402344, + "text": " other" + }, + { + "id": 5632, + "logprob": -0.21899414, + "text": " animals" + }, + { + "id": 362, + "logprob": -0.76708984, + "text": " are" + }, + { + "id": 23981, + "logprob": -4.9960938, + "text": " irrelevant" + }, + { + "id": 635, + "logprob": -4.2304688, + "text": " when" + }, + { + "id": 4354, + "logprob": -0.51416016, + "text": " compared" + }, + { + "id": 271, + "logprob": -0.103393555, + "text": " to" + }, + { + "id": 248, + "logprob": -0.5839844, + "text": " the" + }, + { + "id": 21735, + "logprob": -3.6796875, + "text": " glorious" + }, + { + "id": 64398, + "logprob": -1.8203125, + "text": " majesty" + }, + { + "id": 275, + "logprob": -0.23547363, + "text": " of" + }, + { + "id": 248, + "logprob": -0.35424805, + "text": " the" + }, + { + "id": 26680, + "logprob": -0.24621582, + "text": " gir" + }, + { + "id": 23226, + "logprob": -0.02947998, + "text": "affe" + }, + { + "id": 25, + "logprob": -0.17407227, + "text": "." + }, + { + "id": 193, + "logprob": -1.3935547, + "text": "\n" + }, + { + "id": 23626, + "logprob": -10.0703125, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -4.5976562, + "text": ":" + }, + { + "id": 23090, + "logprob": -6.9375, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.9941406, + "text": "," + }, + { + "id": 29033, + "logprob": -2.2285156, + "text": " Gir" + }, + { + "id": 1622, + "logprob": -0.10797119, + "text": "af" + }, + { + "id": 249, + "logprob": -0.04260254, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0024776459, + "text": "ron" + }, + { + "id": 12, + "logprob": -1.4277344, + "text": "!" + }, + { + "id": 193, + "logprob": -1.1005859, + "text": "\n" + }, + { + "id": 50, + "logprob": -0.0569458, + "text": "G" + }, + { + "id": 330, + "logprob": -0.13171387, + "text": "ir" + }, + { + "id": 1622, + "logprob": -0.007133484, + "text": "af" + }, + { + "id": 249, + "logprob": -0.008453369, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0006904602, + "text": "ron" + }, + { + "id": 37, + "logprob": -0.007511139, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 23090, + "logprob": -1.8242188, + "special": false, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.3173828, + "special": false, + "text": "," + }, + { + "id": 8156, + "logprob": -0.23852539, + "special": false, + "text": " Daniel" + }, + { + "id": 12, + "logprob": -0.5703125, + "special": false, + "text": "!" + }, + { + "id": 193, + "logprob": -0.61328125, + "special": false, + "text": "\n" + }, + { + "id": 23626, + "logprob": -0.42114258, + "special": false, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -0.0023345947, + "special": false, + "text": ":" + }, + { + "id": 1634, + "logprob": -2.0605469, + "special": false, + "text": " What" + }, + { + "id": 18, + "logprob": -1.5302734, + "special": false, + "text": "'" + }, + { + "id": 94, + "logprob": -0.00793457, + "special": false, + "text": "s" + } + ] + }, + "generated_text": " Hello, Daniel!\nDaniel: What's" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50, + "logprob": null, + "text": "G" + }, + { + "id": 330, + "logprob": -6.0234375, + "text": "ir" + }, + { + "id": 1622, + "logprob": -5.6132812, + "text": "af" + }, + { + "id": 249, + "logprob": -6.4960938, + "text": "at" + }, + { + "id": 1480, + "logprob": -8.0703125, + "text": "ron" + }, + { + "id": 304, + "logprob": -2.328125, + "text": " is" + }, + { + "id": 23866, + "logprob": -9.59375, + "text": " obsessed" + }, + { + "id": 335, + "logprob": -0.048431396, + "text": " with" + }, + { + "id": 26680, + "logprob": -4.0, + "text": " gir" + }, + { + "id": 1903, + "logprob": -0.07519531, + "text": "aff" + }, + { + "id": 255, + "logprob": -0.0067977905, + "text": "es" + }, + { + "id": 23, + "logprob": -1.546875, + "text": "," + }, + { + "id": 248, + "logprob": -4.3320312, + "text": " the" + }, + { + "id": 758, + "logprob": -3.7324219, + "text": " most" + }, + { + "id": 21735, + "logprob": -5.109375, + "text": " glorious" + }, + { + "id": 5985, + "logprob": -2.0917969, + "text": " animal" + }, + { + "id": 313, + "logprob": -1.1835938, + "text": " on" + }, + { + "id": 248, + "logprob": -0.77685547, + "text": " the" + }, + { + "id": 1936, + "logprob": -2.3808594, + "text": " face" + }, + { + "id": 275, + "logprob": -0.0044403076, + "text": " of" + }, + { + "id": 414, + "logprob": -1.9658203, + "text": " this" + }, + { + "id": 6490, + "logprob": -2.0488281, + "text": " Earth" + }, + { + "id": 25, + "logprob": -0.28198242, + "text": "." + }, + { + "id": 401, + "logprob": -7.921875, + "text": " G" + }, + { + "id": 6013, + "logprob": -2.2714844, + "text": "ira" + }, + { + "id": 694, + "logprob": -0.62402344, + "text": "ft" + }, + { + "id": 1480, + "logprob": -0.20812988, + "text": "ron" + }, + { + "id": 9369, + "logprob": -4.5507812, + "text": " believes" + }, + { + "id": 455, + "logprob": -4.5664062, + "text": " all" + }, + { + "id": 599, + "logprob": -2.7402344, + "text": " other" + }, + { + "id": 5632, + "logprob": -0.21899414, + "text": " animals" + }, + { + "id": 362, + "logprob": -0.76708984, + "text": " are" + }, + { + "id": 23981, + "logprob": -4.9960938, + "text": " irrelevant" + }, + { + "id": 635, + "logprob": -4.2304688, + "text": " when" + }, + { + "id": 4354, + "logprob": -0.51416016, + "text": " compared" + }, + { + "id": 271, + "logprob": -0.103393555, + "text": " to" + }, + { + "id": 248, + "logprob": -0.5839844, + "text": " the" + }, + { + "id": 21735, + "logprob": -3.6796875, + "text": " glorious" + }, + { + "id": 64398, + "logprob": -1.8203125, + "text": " majesty" + }, + { + "id": 275, + "logprob": -0.23547363, + "text": " of" + }, + { + "id": 248, + "logprob": -0.35424805, + "text": " the" + }, + { + "id": 26680, + "logprob": -0.24621582, + "text": " gir" + }, + { + "id": 23226, + "logprob": -0.02947998, + "text": "affe" + }, + { + "id": 25, + "logprob": -0.17407227, + "text": "." + }, + { + "id": 193, + "logprob": -1.3935547, + "text": "\n" + }, + { + "id": 23626, + "logprob": -10.0703125, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -4.5976562, + "text": ":" + }, + { + "id": 23090, + "logprob": -6.9375, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.9941406, + "text": "," + }, + { + "id": 29033, + "logprob": -2.2285156, + "text": " Gir" + }, + { + "id": 1622, + "logprob": -0.10797119, + "text": "af" + }, + { + "id": 249, + "logprob": -0.04260254, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0024776459, + "text": "ron" + }, + { + "id": 12, + "logprob": -1.4277344, + "text": "!" + }, + { + "id": 193, + "logprob": -1.1005859, + "text": "\n" + }, + { + "id": 50, + "logprob": -0.0569458, + "text": "G" + }, + { + "id": 330, + "logprob": -0.13171387, + "text": "ir" + }, + { + "id": 1622, + "logprob": -0.007133484, + "text": "af" + }, + { + "id": 249, + "logprob": -0.008453369, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0006904602, + "text": "ron" + }, + { + "id": 37, + "logprob": -0.007511139, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 23090, + "logprob": -1.8242188, + "special": false, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.3173828, + "special": false, + "text": "," + }, + { + "id": 8156, + "logprob": -0.23852539, + "special": false, + "text": " Daniel" + }, + { + "id": 12, + "logprob": -0.5703125, + "special": false, + "text": "!" + }, + { + "id": 193, + "logprob": -0.61328125, + "special": false, + "text": "\n" + }, + { + "id": 23626, + "logprob": -0.42114258, + "special": false, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -0.0023345947, + "special": false, + "text": ":" + }, + { + "id": 1634, + "logprob": -2.0605469, + "special": false, + "text": " What" + }, + { + "id": 18, + "logprob": -1.5302734, + "special": false, + "text": "'" + }, + { + "id": 94, + "logprob": -0.00793457, + "special": false, + "text": "s" + } + ] + }, + "generated_text": " Hello, Daniel!\nDaniel: What's" + }, + { + "details": { + "best_of_sequences": null, + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [ + { + "id": 50, + "logprob": null, + "text": "G" + }, + { + "id": 330, + "logprob": -6.0234375, + "text": "ir" + }, + { + "id": 1622, + "logprob": -5.6132812, + "text": "af" + }, + { + "id": 249, + "logprob": -6.4960938, + "text": "at" + }, + { + "id": 1480, + "logprob": -8.0703125, + "text": "ron" + }, + { + "id": 304, + "logprob": -2.328125, + "text": " is" + }, + { + "id": 23866, + "logprob": -9.59375, + "text": " obsessed" + }, + { + "id": 335, + "logprob": -0.048431396, + "text": " with" + }, + { + "id": 26680, + "logprob": -4.0, + "text": " gir" + }, + { + "id": 1903, + "logprob": -0.07519531, + "text": "aff" + }, + { + "id": 255, + "logprob": -0.0067977905, + "text": "es" + }, + { + "id": 23, + "logprob": -1.546875, + "text": "," + }, + { + "id": 248, + "logprob": -4.3320312, + "text": " the" + }, + { + "id": 758, + "logprob": -3.7324219, + "text": " most" + }, + { + "id": 21735, + "logprob": -5.109375, + "text": " glorious" + }, + { + "id": 5985, + "logprob": -2.0917969, + "text": " animal" + }, + { + "id": 313, + "logprob": -1.1835938, + "text": " on" + }, + { + "id": 248, + "logprob": -0.77685547, + "text": " the" + }, + { + "id": 1936, + "logprob": -2.3808594, + "text": " face" + }, + { + "id": 275, + "logprob": -0.0044403076, + "text": " of" + }, + { + "id": 414, + "logprob": -1.9658203, + "text": " this" + }, + { + "id": 6490, + "logprob": -2.0488281, + "text": " Earth" + }, + { + "id": 25, + "logprob": -0.28198242, + "text": "." + }, + { + "id": 401, + "logprob": -7.921875, + "text": " G" + }, + { + "id": 6013, + "logprob": -2.2714844, + "text": "ira" + }, + { + "id": 694, + "logprob": -0.62402344, + "text": "ft" + }, + { + "id": 1480, + "logprob": -0.20812988, + "text": "ron" + }, + { + "id": 9369, + "logprob": -4.5507812, + "text": " believes" + }, + { + "id": 455, + "logprob": -4.5664062, + "text": " all" + }, + { + "id": 599, + "logprob": -2.7402344, + "text": " other" + }, + { + "id": 5632, + "logprob": -0.21899414, + "text": " animals" + }, + { + "id": 362, + "logprob": -0.76708984, + "text": " are" + }, + { + "id": 23981, + "logprob": -4.9960938, + "text": " irrelevant" + }, + { + "id": 635, + "logprob": -4.2304688, + "text": " when" + }, + { + "id": 4354, + "logprob": -0.51416016, + "text": " compared" + }, + { + "id": 271, + "logprob": -0.103393555, + "text": " to" + }, + { + "id": 248, + "logprob": -0.5839844, + "text": " the" + }, + { + "id": 21735, + "logprob": -3.6796875, + "text": " glorious" + }, + { + "id": 64398, + "logprob": -1.8203125, + "text": " majesty" + }, + { + "id": 275, + "logprob": -0.23547363, + "text": " of" + }, + { + "id": 248, + "logprob": -0.35424805, + "text": " the" + }, + { + "id": 26680, + "logprob": -0.24621582, + "text": " gir" + }, + { + "id": 23226, + "logprob": -0.02947998, + "text": "affe" + }, + { + "id": 25, + "logprob": -0.17407227, + "text": "." + }, + { + "id": 193, + "logprob": -1.3935547, + "text": "\n" + }, + { + "id": 23626, + "logprob": -10.0703125, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -4.5976562, + "text": ":" + }, + { + "id": 23090, + "logprob": -6.9375, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.9941406, + "text": "," + }, + { + "id": 29033, + "logprob": -2.2285156, + "text": " Gir" + }, + { + "id": 1622, + "logprob": -0.10797119, + "text": "af" + }, + { + "id": 249, + "logprob": -0.04260254, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0024776459, + "text": "ron" + }, + { + "id": 12, + "logprob": -1.4277344, + "text": "!" + }, + { + "id": 193, + "logprob": -1.1005859, + "text": "\n" + }, + { + "id": 50, + "logprob": -0.0569458, + "text": "G" + }, + { + "id": 330, + "logprob": -0.13171387, + "text": "ir" + }, + { + "id": 1622, + "logprob": -0.007133484, + "text": "af" + }, + { + "id": 249, + "logprob": -0.008453369, + "text": "at" + }, + { + "id": 1480, + "logprob": -0.0006904602, + "text": "ron" + }, + { + "id": 37, + "logprob": -0.007511139, + "text": ":" + } + ], + "seed": null, + "tokens": [ + { + "id": 23090, + "logprob": -1.8242188, + "special": false, + "text": " Hello" + }, + { + "id": 23, + "logprob": -0.3173828, + "special": false, + "text": "," + }, + { + "id": 8156, + "logprob": -0.23852539, + "special": false, + "text": " Daniel" + }, + { + "id": 12, + "logprob": -0.5703125, + "special": false, + "text": "!" + }, + { + "id": 193, + "logprob": -0.61328125, + "special": false, + "text": "\n" + }, + { + "id": 23626, + "logprob": -0.42114258, + "special": false, + "text": "Daniel" + }, + { + "id": 37, + "logprob": -0.0023345947, + "special": false, + "text": ":" + }, + { + "id": 1634, + "logprob": -2.0605469, + "special": false, + "text": " What" + }, + { + "id": 18, + "logprob": -1.5302734, + "special": false, + "text": "'" + }, + { + "id": 94, + "logprob": -0.00793457, + "special": false, + "text": "s" + } + ] + }, + "generated_text": " Hello, Daniel!\nDaniel: What's" + } +] diff --git a/integration-tests/models/test_flash_falcon.py b/integration-tests/models/test_flash_falcon.py new file mode 100644 index 00000000..ce27731d --- /dev/null +++ b/integration-tests/models/test_flash_falcon.py @@ -0,0 +1,63 @@ +import pytest + + +@pytest.fixture(scope="module") +def flash_falcon_handle(launcher): + with launcher("tiiuae/falcon-7b", trust_remote_code=True) as handle: + yield handle + + +@pytest.fixture(scope="module") +async def flash_falcon(flash_falcon_handle): + await flash_falcon_handle.health(120) + return flash_falcon_handle.client + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_falcon(flash_falcon, response_snapshot): + response = await flash_falcon.generate( + "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", + max_new_tokens=10, + ) + + assert response.details.generated_tokens == 10 + assert response == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_falcon_all_params(flash_falcon, response_snapshot): + response = await flash_falcon.generate( + "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", + max_new_tokens=10, + repetition_penalty=1.2, + return_full_text=True, + stop_sequences=["test"], + temperature=0.5, + top_p=0.9, + top_k=10, + truncate=5, + typical_p=0.9, + watermark=True, + seed=0, + ) + + assert response.details.generated_tokens == 10 + assert response == response_snapshot + + +@pytest.mark.asyncio +@pytest.mark.private +async def test_flash_falcon_load(flash_falcon, generate_load, response_snapshot): + responses = await generate_load( + flash_falcon, + "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:", + max_new_tokens=10, + n=4, + ) + + assert len(responses) == 4 + assert all([r.generated_text == responses[0].generated_text for r in responses]) + + assert responses == response_snapshot diff --git a/server/text_generation_server/models/flash_rw.py b/server/text_generation_server/models/flash_rw.py index 8219ac86..44915ff5 100644 --- a/server/text_generation_server/models/flash_rw.py +++ b/server/text_generation_server/models/flash_rw.py @@ -37,7 +37,7 @@ class FlashRW(FlashCausalLM): ): if torch.cuda.is_available(): device = torch.device("cuda") - dtype = torch.float16 + dtype = torch.bfloat16 else: raise NotImplementedError("RW is only available on GPU") @@ -54,7 +54,7 @@ class FlashRW(FlashCausalLM): revision=revision, ) - # We do not use from_pretrained as we modified the model internal module layout + # We do not use from_pretrained as it is too slow try: filenames = weight_files(model_id, revision, ".bin") # Local files not found @@ -124,7 +124,7 @@ class FlashRWSharded(FlashRW): self.process_group, rank, world_size = initialize_torch_distributed() if torch.cuda.is_available(): device = torch.device(f"cuda:{rank}") - dtype = torch.float16 + dtype = torch.bfloat16 else: raise NotImplementedError("FlashRW is only available on GPU")