mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Adding orca script.
This commit is contained in:
parent
ab156adc0f
commit
affef3276e
@ -2,5 +2,8 @@
|
|||||||
ShareGPT_V3_unfiltered_cleaned_split.json:
|
ShareGPT_V3_unfiltered_cleaned_split.json:
|
||||||
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
|
|
||||||
prepare_data: ShareGPT_V3_unfiltered_cleaned_split.json
|
prepare_share: ShareGPT_V3_unfiltered_cleaned_split.json
|
||||||
python filter.py
|
python filter.py
|
||||||
|
|
||||||
|
prepare_orca:
|
||||||
|
python orca.py
|
||||||
|
27
load_tests/orca.py
Normal file
27
load_tests/orca.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import json
|
||||||
|
import datasets
|
||||||
|
import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
dataset = datasets.load_dataset("Open-Orca/OpenOrca", split="train")
|
||||||
|
# Select only the first 2k conversations that start with a human.
|
||||||
|
max = min(2000, len(dataset))
|
||||||
|
conversations = []
|
||||||
|
for item in tqdm.tqdm(dataset, total=max):
|
||||||
|
conversation = {
|
||||||
|
"conversations": [
|
||||||
|
{"from": "human", "value": item["question"]},
|
||||||
|
],
|
||||||
|
"id": item["id"],
|
||||||
|
}
|
||||||
|
conversations.append(conversation)
|
||||||
|
if len(conversations) >= max:
|
||||||
|
break
|
||||||
|
|
||||||
|
with open("./small.json", "w") as f:
|
||||||
|
data = json.dump(conversations, f, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -1,63 +0,0 @@
|
|||||||
import {check} from 'k6';
|
|
||||||
import http from 'k6/http';
|
|
||||||
import {Trend} from 'k6/metrics';
|
|
||||||
|
|
||||||
const host = __ENV.HOST || '127.0.0.1:3000';
|
|
||||||
|
|
||||||
const totalTime = new Trend('total_time', true);
|
|
||||||
const validationTime = new Trend('validation_time', true);
|
|
||||||
const queueTime = new Trend('queue_time', true);
|
|
||||||
const inferenceTime = new Trend('inference_time', true);
|
|
||||||
const timePerToken = new Trend('time_per_token', true);
|
|
||||||
|
|
||||||
const example = {
|
|
||||||
payload: JSON.stringify({
|
|
||||||
inputs: '# This is a fibonacci function written in the Python programming language.' +
|
|
||||||
'def fibonacci',
|
|
||||||
parameters: {
|
|
||||||
details: true,
|
|
||||||
max_new_tokens: 60,
|
|
||||||
temperature: 0.2,
|
|
||||||
top_p: 0.95,
|
|
||||||
seed: 0,
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
generated_tokens: 60
|
|
||||||
};
|
|
||||||
|
|
||||||
export const options = {
|
|
||||||
thresholds: {
|
|
||||||
http_req_failed: ['rate==0'],
|
|
||||||
time_per_token: ['p(95)<90'],
|
|
||||||
queue_time: ['p(95)<1500'],
|
|
||||||
},
|
|
||||||
scenarios: {
|
|
||||||
load_test: {
|
|
||||||
executor: 'constant-arrival-rate',
|
|
||||||
duration: '60s',
|
|
||||||
preAllocatedVUs: 100,
|
|
||||||
rate: 10,
|
|
||||||
timeUnit: '1s',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
export default function () {
|
|
||||||
const headers = {'Content-Type': 'application/json'};
|
|
||||||
const res = http.post(`http://${host}/generate`, example.payload, {
|
|
||||||
headers,
|
|
||||||
});
|
|
||||||
|
|
||||||
check(res, {
|
|
||||||
'Post status is 200': (r) => res.status === 200,
|
|
||||||
'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens,
|
|
||||||
});
|
|
||||||
|
|
||||||
if (res.status === 200) {
|
|
||||||
totalTime.add(res.headers["X-Total-Time"]);
|
|
||||||
validationTime.add(res.headers["X-Validation-Time"]);
|
|
||||||
queueTime.add(res.headers["X-Queue-Time"]);
|
|
||||||
inferenceTime.add(res.headers["X-Inference-Time"]);
|
|
||||||
timePerToken.add(res.headers["X-Time-Per-Token"]);
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user