mirror of
https://github.com/huggingface/text-generation-inference.git
synced 2025-04-25 01:22:14 +00:00
fix: Compute comparison table
This commit is contained in:
parent
8d358d9c61
commit
4642fd27ad
2
.github/workflows/ci_build.yaml
vendored
2
.github/workflows/ci_build.yaml
vendored
@ -43,5 +43,3 @@ jobs:
|
|||||||
# https://github.com/actions/runner/issues/2206
|
# https://github.com/actions/runner/issues/2206
|
||||||
release-tests: ${{ inputs.release-tests == true }}
|
release-tests: ${{ inputs.release-tests == true }}
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
load_tests:
|
|
||||||
uses: ./.github/workflows/load_test.yaml
|
|
19
.github/workflows/load_test.yaml
vendored
19
.github/workflows/load_test.yaml
vendored
@ -4,6 +4,7 @@ on:
|
|||||||
schedule:
|
schedule:
|
||||||
- cron: '0 0 * * 1-5'
|
- cron: '0 0 * * 1-5'
|
||||||
workflow_call:
|
workflow_call:
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
@ -15,6 +16,7 @@ env:
|
|||||||
AWS_DEFAULT_REGION: us-east-1
|
AWS_DEFAULT_REGION: us-east-1
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.S3_AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.S3_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
LOAD_TEST_ISSUE: 2235
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
load-tests:
|
load-tests:
|
||||||
@ -76,7 +78,7 @@ jobs:
|
|||||||
- name: Archive test results artifacts
|
- name: Archive test results artifacts
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: benchmark_results_plots
|
name: benchmark_results
|
||||||
path: |
|
path: |
|
||||||
load_tests/output/*
|
load_tests/output/*
|
||||||
|
|
||||||
@ -88,15 +90,22 @@ jobs:
|
|||||||
if: github.event_name == 'pull_request'
|
if: github.event_name == 'pull_request'
|
||||||
with:
|
with:
|
||||||
script: |
|
script: |
|
||||||
|
let content=require('fs').readFileSync('load_tests/output/benchmark_avg_delta.md', 'utf-8');
|
||||||
github.rest.issues.createComment({
|
github.rest.issues.createComment({
|
||||||
issue_number: context.issue.number,
|
issue_number: process.env.LOAD_TEST_ISSUE,
|
||||||
owner: context.repo.owner,
|
owner: context.repo.owner,
|
||||||
repo: context.repo.repo,
|
repo: context.repo.repo,
|
||||||
body: '🚀 Load test results are in:\n\n'+
|
body: '🚀 Load test results are in for commit [${{ github.sha }}](https://github.com/huggingface/text-generation-inference/commit/${{ github.sha }})\n\n'+
|
||||||
'## Variable length prompts\n'+
|
'## Variable length prompts\n'+
|
||||||
|
'<p float="left">\n'+
|
||||||
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/sharegpt_conversations_constant_arrival_rate.png" width=200>\n' +
|
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/sharegpt_conversations_constant_arrival_rate.png" width=200>\n' +
|
||||||
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/sharegpt_conversations_constant_vus.png" width=200>\n\n' +
|
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/sharegpt_conversations_constant_vus.png" width=200>\n' +
|
||||||
|
'</p>\n\n' +
|
||||||
'## Constant length prompts\n'+
|
'## Constant length prompts\n'+
|
||||||
|
'<p float="left">\n'+
|
||||||
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/constant_tokens_constant_vus.png" width=200>\n' +
|
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/constant_tokens_constant_vus.png" width=200>\n' +
|
||||||
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/constant_tokens_constant_arrival_rate.png" width=200>\n'
|
'<img src="http://text-generation-inference-ci.s3-website-us-east-1.amazonaws.com/${{github.sha}}/constant_tokens_constant_arrival_rate.png" width=200>\n'+
|
||||||
|
'</p>\n\n' +
|
||||||
|
'## Delta to last release\n\n'+
|
||||||
|
content
|
||||||
})
|
})
|
||||||
|
@ -41,6 +41,48 @@ def merge_previous_results(csv_path: str, df: pd.DataFrame, version_id: str) ->
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def percentage_diff(x):
|
||||||
|
# in case we have no value to compare
|
||||||
|
if len(x) < 2:
|
||||||
|
return 0
|
||||||
|
xsum = (x[1] + x[0])
|
||||||
|
if xsum == 0:
|
||||||
|
return 0
|
||||||
|
return abs(x[1] - x[0]) / (xsum / 2) * 100
|
||||||
|
|
||||||
|
|
||||||
|
def compute_avg_delta(df: pd.DataFrame, metric: str, test_type: TestType) -> float:
|
||||||
|
if test_type == TestType.CONSTANT_VUS:
|
||||||
|
param = 'vus'
|
||||||
|
elif test_type == TestType.CONSTANT_ARRIVAL_RATE:
|
||||||
|
param = 'rate'
|
||||||
|
else:
|
||||||
|
return 0.0
|
||||||
|
filtered = df[df[param].notna()].groupby(param)[metric]
|
||||||
|
return filtered.apply(lambda x: percentage_diff(sorted(x.values))).mean()
|
||||||
|
|
||||||
|
|
||||||
|
def compute_avg_table(df: pd.DataFrame):
|
||||||
|
# only keep the current version and semver rows for comparison
|
||||||
|
df = df[df['name'].str.startswith(('tgi', 'v'))]
|
||||||
|
# compute the average delta for each metric and test type
|
||||||
|
avg_table = pd.DataFrame()
|
||||||
|
for input_type in [ExecutorInputType.SHAREGPT_CONVERSATIONS, ExecutorInputType.CONSTANT_TOKENS]:
|
||||||
|
df_avg = df[df['input_type'] == input_type.value]
|
||||||
|
for test_type in [TestType.CONSTANT_VUS, TestType.CONSTANT_ARRIVAL_RATE]:
|
||||||
|
for metric in df.columns:
|
||||||
|
if metric in ['inter_token_latency', 'time_to_first_token', 'end_to_end_latency',
|
||||||
|
'tokens_throughput', 'requests_ok', 'error_rate']:
|
||||||
|
avg_delta = compute_avg_delta(df_avg, metric, test_type)
|
||||||
|
avg_table = pd.concat([avg_table, pd.DataFrame(
|
||||||
|
{'metric': metric, 'input_type': input_type.value, 'test_type': test_type.value,
|
||||||
|
'avg_delta': avg_delta}, index=[0])])
|
||||||
|
# write the result to a markdown formatted table in a file
|
||||||
|
path = os.path.join(os.getcwd(), 'output', f'benchmark_avg_delta.md')
|
||||||
|
avg_table.to_markdown(path, index=False, tablefmt='github',
|
||||||
|
headers=['Metric', 'Input Type', 'Test Type', 'Avg Delta (%)'])
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
model = 'Qwen/Qwen2-7B'
|
model = 'Qwen/Qwen2-7B'
|
||||||
runner = TGIDockerRunner(model)
|
runner = TGIDockerRunner(model)
|
||||||
@ -59,6 +101,7 @@ def main():
|
|||||||
runner.stop()
|
runner.stop()
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
|
all_dfs = pd.DataFrame()
|
||||||
for input_type in [ExecutorInputType.SHAREGPT_CONVERSATIONS, ExecutorInputType.CONSTANT_TOKENS]:
|
for input_type in [ExecutorInputType.SHAREGPT_CONVERSATIONS, ExecutorInputType.CONSTANT_TOKENS]:
|
||||||
for test_type in [TestType.CONSTANT_VUS, TestType.CONSTANT_ARRIVAL_RATE]:
|
for test_type in [TestType.CONSTANT_VUS, TestType.CONSTANT_ARRIVAL_RATE]:
|
||||||
directory = os.path.join('results', input_type.value.lower(), test_type.value.lower())
|
directory = os.path.join('results', input_type.value.lower(), test_type.value.lower())
|
||||||
@ -84,12 +127,15 @@ def main():
|
|||||||
if f.endswith(f'{input_type.value.lower()}_{test_type.value.lower()}.csv'):
|
if f.endswith(f'{input_type.value.lower()}_{test_type.value.lower()}.csv'):
|
||||||
csv_path = os.path.join('/tmp/artifacts', d, f)
|
csv_path = os.path.join('/tmp/artifacts', d, f)
|
||||||
# only keep short commit hash
|
# only keep short commit hash
|
||||||
d = d[:7]
|
if len(d) > 7:
|
||||||
|
d = d[:7]
|
||||||
dfs = merge_previous_results(csv_path, dfs, d)
|
dfs = merge_previous_results(csv_path, dfs, d)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f'Error while merging previous results, skipping: {e}')
|
logger.error(f'Error while merging previous results, skipping: {e}')
|
||||||
plot_metrics(f'{model} {get_gpu_names()}', dfs, test_type,
|
plot_metrics(f'{model} {get_gpu_names()}', dfs, test_type,
|
||||||
f'output/{input_type.value.lower()}_{test_type.value.lower()}')
|
f'output/{input_type.value.lower()}_{test_type.value.lower()}')
|
||||||
|
all_dfs = pd.concat([all_dfs, dfs])
|
||||||
|
compute_avg_table(all_dfs)
|
||||||
|
|
||||||
|
|
||||||
def get_gpu_names() -> str:
|
def get_gpu_names() -> str:
|
||||||
|
@ -36,6 +36,7 @@ def parse_json_files(directory: str, test_type: TestType) -> pd.DataFrame:
|
|||||||
'rate': data['k6_config']['rate'],
|
'rate': data['k6_config']['rate'],
|
||||||
'duration': data['k6_config']['duration']
|
'duration': data['k6_config']['duration']
|
||||||
}
|
}
|
||||||
|
entry['input_type'] = data['k6_config']['input_type']
|
||||||
entry['test_duration'] = data['state']['testRunDurationMs'] / 1000.
|
entry['test_duration'] = data['state']['testRunDurationMs'] / 1000.
|
||||||
entry['requests_ok'] = data['root_group']['checks'][0]['passes']
|
entry['requests_ok'] = data['root_group']['checks'][0]['passes']
|
||||||
entry['requests_fail'] = data['root_group']['checks'][0]['fails']
|
entry['requests_fail'] = data['root_group']['checks'][0]['fails']
|
||||||
|
16
load_tests/poetry.lock
generated
16
load_tests/poetry.lock
generated
@ -1297,6 +1297,20 @@ files = [
|
|||||||
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
{file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tabulate"
|
||||||
|
version = "0.9.0"
|
||||||
|
description = "Pretty-print tabular data"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"},
|
||||||
|
{file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
widechars = ["wcwidth"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.19.1"
|
version = "0.19.1"
|
||||||
@ -1558,4 +1572,4 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.11"
|
python-versions = "^3.11"
|
||||||
content-hash = "7678b38af0a7b76457fa91758a1b1021404c5981ca1298863dece6158d67a867"
|
content-hash = "e58b659457f8a7dc54ca3f5e7c247351d90f7e741ecdbbf1fa94f4a597da8844"
|
||||||
|
@ -16,6 +16,7 @@ psutil = "^6.0.0"
|
|||||||
jinja2 = "^3.1.4"
|
jinja2 = "^3.1.4"
|
||||||
transformers = "^4.42.3"
|
transformers = "^4.42.3"
|
||||||
gputil = "^1.4.0"
|
gputil = "^1.4.0"
|
||||||
|
tabulate = "^0.9.0"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
|
Loading…
Reference in New Issue
Block a user