mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-09-11 20:34:54 +00:00
Adding orca script.
This commit is contained in:
parent
ab156adc0f
commit
affef3276e
@ -2,5 +2,8 @@
|
||||
ShareGPT_V3_unfiltered_cleaned_split.json:
|
||||
wget https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
||||
|
||||
prepare_data: ShareGPT_V3_unfiltered_cleaned_split.json
|
||||
prepare_share: ShareGPT_V3_unfiltered_cleaned_split.json
|
||||
python filter.py
|
||||
|
||||
prepare_orca:
|
||||
python orca.py
|
||||
|
27
load_tests/orca.py
Normal file
27
load_tests/orca.py
Normal file
@ -0,0 +1,27 @@
|
||||
import json
|
||||
import datasets
|
||||
import tqdm
|
||||
|
||||
|
||||
def main():
|
||||
dataset = datasets.load_dataset("Open-Orca/OpenOrca", split="train")
|
||||
# Select only the first 2k conversations that start with a human.
|
||||
max = min(2000, len(dataset))
|
||||
conversations = []
|
||||
for item in tqdm.tqdm(dataset, total=max):
|
||||
conversation = {
|
||||
"conversations": [
|
||||
{"from": "human", "value": item["question"]},
|
||||
],
|
||||
"id": item["id"],
|
||||
}
|
||||
conversations.append(conversation)
|
||||
if len(conversations) >= max:
|
||||
break
|
||||
|
||||
with open("./small.json", "w") as f:
|
||||
data = json.dump(conversations, f, indent=4)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -1,63 +0,0 @@
|
||||
import {check} from 'k6';
|
||||
import http from 'k6/http';
|
||||
import {Trend} from 'k6/metrics';
|
||||
|
||||
const host = __ENV.HOST || '127.0.0.1:3000';
|
||||
|
||||
const totalTime = new Trend('total_time', true);
|
||||
const validationTime = new Trend('validation_time', true);
|
||||
const queueTime = new Trend('queue_time', true);
|
||||
const inferenceTime = new Trend('inference_time', true);
|
||||
const timePerToken = new Trend('time_per_token', true);
|
||||
|
||||
const example = {
|
||||
payload: JSON.stringify({
|
||||
inputs: '# This is a fibonacci function written in the Python programming language.' +
|
||||
'def fibonacci',
|
||||
parameters: {
|
||||
details: true,
|
||||
max_new_tokens: 60,
|
||||
temperature: 0.2,
|
||||
top_p: 0.95,
|
||||
seed: 0,
|
||||
},
|
||||
}),
|
||||
generated_tokens: 60
|
||||
};
|
||||
|
||||
export const options = {
|
||||
thresholds: {
|
||||
http_req_failed: ['rate==0'],
|
||||
time_per_token: ['p(95)<90'],
|
||||
queue_time: ['p(95)<1500'],
|
||||
},
|
||||
scenarios: {
|
||||
load_test: {
|
||||
executor: 'constant-arrival-rate',
|
||||
duration: '60s',
|
||||
preAllocatedVUs: 100,
|
||||
rate: 10,
|
||||
timeUnit: '1s',
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export default function () {
|
||||
const headers = {'Content-Type': 'application/json'};
|
||||
const res = http.post(`http://${host}/generate`, example.payload, {
|
||||
headers,
|
||||
});
|
||||
|
||||
check(res, {
|
||||
'Post status is 200': (r) => res.status === 200,
|
||||
'Post response generated tokens': (r) => res.status === 200 && res.json().details.generated_tokens === example.generated_tokens,
|
||||
});
|
||||
|
||||
if (res.status === 200) {
|
||||
totalTime.add(res.headers["X-Total-Time"]);
|
||||
validationTime.add(res.headers["X-Validation-Time"]);
|
||||
queueTime.add(res.headers["X-Queue-Time"]);
|
||||
inferenceTime.add(res.headers["X-Inference-Time"]);
|
||||
timePerToken.add(res.headers["X-Time-Per-Token"]);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user