From affef3276ed3e4a429a2ee37591dead6593c9bd9 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 1 May 2024 11:42:51 +0200 Subject: [PATCH] Adding orca script. --- load_tests/Makefile | 5 ++- load_tests/orca.py | 27 ++++++++++++++++ load_tests/starcoder_load.js | 63 ------------------------------------ 3 files changed, 31 insertions(+), 64 deletions(-) create mode 100644 load_tests/orca.py delete mode 100644 load_tests/starcoder_load.js diff --git a/load_tests/Makefile b/load_tests/Makefile index 81d16b87..9199aa3b 100644 --- a/load_tests/Makefile +++ b/load_tests/Makefile @@ -2,5 +2,8 @@ ShareGPT_V3_unfiltered_cleaned_split.json: wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json -prepare_data: ShareGPT_V3_unfiltered_cleaned_split.json +prepare_share: ShareGPT_V3_unfiltered_cleaned_split.json python filter.py + +prepare_orca: + python orca.py diff --git a/load_tests/orca.py b/load_tests/orca.py new file mode 100644 index 00000000..e607d27c --- /dev/null +++ b/load_tests/orca.py @@ -0,0 +1,27 @@ +import json +import datasets +import tqdm + + +def main(): + dataset = datasets.load_dataset("Open-Orca/OpenOrca", split="train") + # Select only the first 2k conversations that start with a human. + max = min(2000, len(dataset)) + conversations = [] + for item in tqdm.tqdm(dataset, total=max): + conversation = { + "conversations": [ + {"from": "human", "value": item["question"]}, + ], + "id": item["id"], + } + conversations.append(conversation) + if len(conversations) >= max: + break + + with open("./small.json", "w") as f: + data = json.dump(conversations, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/load_tests/starcoder_load.js b/load_tests/starcoder_load.js deleted file mode 100644 index 2f6cb3d6..00000000 --- a/load_tests/starcoder_load.js +++ /dev/null @@ -1,63 +0,0 @@ -import {check} from 'k6'; -import http from 'k6/http'; -import {Trend} from 'k6/metrics'; - -const host = __ENV.HOST || '127.0.0.1:3000'; - -const totalTime = new Trend('total_time', true); -const validationTime = new Trend('validation_time', true); -const queueTime = new Trend('queue_time', true); -const inferenceTime = new Trend('inference_time', true); -const timePerToken = new Trend('time_per_token', true); - -const example = { - payload: JSON.stringify({ - inputs: '# This is a fibonacci function written in the Python programming language.' + - 'def fibonacci', - parameters: { - details: true, - max_new_tokens: 60, - temperature: 0.2, - top_p: 0.95, - seed: 0, - }, - }), - generated_tokens: 60 -}; - -export const options = { - thresholds: { - http_req_failed: ['rate==0'], - time_per_token: ['p(95)<90'], - queue_time: ['p(95)<1500'], - }, - scenarios: { - load_test: { - executor: 'constant-arrival-rate', - duration: '60s', - preAllocatedVUs: 100, - rate: 10, - timeUnit: '1s', - }, - }, -}; - -export default function () { - const headers = {'Content-Type': 'application/json'}; - const res = http.post(`http://${host}/generate`, example.payload, { - headers, - }); - - check(res, { - 'Post status is 200': (r) => res.status === 200, - 'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens, - }); - - if (res.status === 200) { - totalTime.add(res.headers["X-Total-Time"]); - validationTime.add(res.headers["X-Validation-Time"]); - queueTime.add(res.headers["X-Queue-Time"]); - inferenceTime.add(res.headers["X-Inference-Time"]); - timePerToken.add(res.headers["X-Time-Per-Token"]); - } -}