From affef3276ed3e4a429a2ee37591dead6593c9bd9 Mon Sep 17 00:00:00 2001
From: Nicolas Patry <patry.nicolas@protonmail.com>
Date: Wed, 1 May 2024 11:42:51 +0200
Subject: [PATCH] Adding orca script.

---
 load_tests/Makefile          |  5 ++-
 load_tests/orca.py           | 27 ++++++++++++++++
 load_tests/starcoder_load.js | 63 ------------------------------------
 3 files changed, 31 insertions(+), 64 deletions(-)
 create mode 100644 load_tests/orca.py
 delete mode 100644 load_tests/starcoder_load.js

diff --git a/load_tests/Makefile b/load_tests/Makefile
index 81d16b87..9199aa3b 100644
--- a/load_tests/Makefile
+++ b/load_tests/Makefile
@@ -2,5 +2,8 @@
 ShareGPT_V3_unfiltered_cleaned_split.json:
 	wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
 
-prepare_data: ShareGPT_V3_unfiltered_cleaned_split.json
+prepare_share: ShareGPT_V3_unfiltered_cleaned_split.json
 	python filter.py
+
+prepare_orca:
+	python orca.py
diff --git a/load_tests/orca.py b/load_tests/orca.py
new file mode 100644
index 00000000..e607d27c
--- /dev/null
+++ b/load_tests/orca.py
@@ -0,0 +1,27 @@
+import json
+import datasets
+import tqdm
+
+
+def main():
+    dataset = datasets.load_dataset("Open-Orca/OpenOrca", split="train")
+    # Select only the first 2k conversations that start with a human.
+    max = min(2000, len(dataset))
+    conversations = []
+    for item in tqdm.tqdm(dataset, total=max):
+        conversation = {
+            "conversations": [
+                {"from": "human", "value": item["question"]},
+            ],
+            "id": item["id"],
+        }
+        conversations.append(conversation)
+        if len(conversations) >= max:
+            break
+
+    with open("./small.json", "w") as f:
+        data = json.dump(conversations, f, indent=4)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/load_tests/starcoder_load.js b/load_tests/starcoder_load.js
deleted file mode 100644
index 2f6cb3d6..00000000
--- a/load_tests/starcoder_load.js
+++ /dev/null
@@ -1,63 +0,0 @@
-import {check} from 'k6';
-import http from 'k6/http';
-import {Trend} from 'k6/metrics';
-
-const host = __ENV.HOST || '127.0.0.1:3000';
-
-const totalTime = new Trend('total_time', true);
-const validationTime = new Trend('validation_time', true);
-const queueTime = new Trend('queue_time', true);
-const inferenceTime = new Trend('inference_time', true);
-const timePerToken = new Trend('time_per_token', true);
-
-const example = {
-    payload: JSON.stringify({
-        inputs: '# This is a fibonacci function written in the Python programming language.' +
-            'def fibonacci',
-        parameters: {
-            details: true,
-            max_new_tokens: 60,
-            temperature: 0.2,
-            top_p: 0.95,
-            seed: 0,
-        },
-    }),
-    generated_tokens: 60
-};
-
-export const options = {
-    thresholds: {
-        http_req_failed: ['rate==0'],
-        time_per_token: ['p(95)<90'],
-        queue_time: ['p(95)<1500'],
-    },
-    scenarios: {
-        load_test: {
-            executor: 'constant-arrival-rate',
-            duration: '60s',
-            preAllocatedVUs: 100,
-            rate: 10,
-            timeUnit: '1s',
-        },
-    },
-};
-
-export default function () {
-    const headers = {'Content-Type': 'application/json'};
-    const res = http.post(`http://${host}/generate`, example.payload, {
-        headers,
-    });
-
-    check(res, {
-        'Post status is 200': (r) => res.status === 200,
-        'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens,
-    });
-
-    if (res.status === 200) {
-        totalTime.add(res.headers["X-Total-Time"]);
-        validationTime.add(res.headers["X-Validation-Time"]);
-        queueTime.add(res.headers["X-Queue-Time"]);
-        inferenceTime.add(res.headers["X-Inference-Time"]);
-        timePerToken.add(res.headers["X-Time-Per-Token"]);
-    }
-}