diff --git a/.github/workflows/kani.yml b/.github/workflows/kani.yml index e40484f852ce..2f19a5fb2c7d 100644 --- a/.github/workflows/kani.yml +++ b/.github/workflows/kani.yml @@ -306,4 +306,20 @@ jobs: - name: Run benchcomp run: | new/tools/benchcomp/bin/benchcomp \ - --config new/tools/benchcomp/configs/perf-regression.yaml + --config new/tools/benchcomp/configs/perf-regression.yaml \ + run + new/tools/benchcomp/bin/benchcomp \ + --config new/tools/benchcomp/configs/perf-regression.yaml \ + collate + + - name: Perf Regression Results Table + run: | + new/tools/benchcomp/bin/benchcomp \ + --config new/tools/benchcomp/configs/perf-regression.yaml \ + visualize --only dump_markdown_results_table >> "$GITHUB_STEP_SUMMARY" + + - name: Run other visualizations + run: | + new/tools/benchcomp/bin/benchcomp \ + --config new/tools/benchcomp/configs/perf-regression.yaml \ + visualize --except dump_markdown_results_table diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py index 7fa2b3b750ee..c4870c425f19 100644 --- a/tools/benchcomp/benchcomp/visualizers/__init__.py +++ b/tools/benchcomp/benchcomp/visualizers/__init__.py @@ -3,7 +3,9 @@ import dataclasses +import textwrap +import jinja2 import yaml import benchcomp @@ -76,3 +78,73 @@ def __call__(self, results): with self.get_out_file() as handle: print( yaml.dump(results, default_flow_style=False), file=handle) + + + +class dump_markdown_results_table: + """Print a Markdown-formatted table displaying benchmark results + + The 'out_file' key is mandatory; specify '-' to print to stdout. + + Sample configuration: + + visualize: + - type: dump_markdown_results_table + out_file: '-' + """ + + + def __init__(self, out_file): + self.get_out_file = benchcomp.Outfile(out_file) + + + @staticmethod + def _get_template(): + return textwrap.dedent("""\ + {% for metric, benchmarks in d["metrics"].items() %} + ## {{ metric }} + + | Benchmark | {% for variant in d["variants"] %} {{ variant }} |{% endfor %} + | --- | {% for variant in d["variants"] %}--- |{% endfor -%} + {% for bench_name, bench_variants in benchmarks.items () %} + | {{ bench_name }} {% for variant in d["variants"] -%} + | {{ bench_variants[variant] }} {% endfor %}| + {%- endfor %} + {% endfor -%} + """) + + + @staticmethod + def _get_variant_names(results): + return results.values()[0]["variants"] + + + @staticmethod + def _organize_results_into_metrics(results): + ret = {metric: {} for metric in results["metrics"]} + for bench, bench_result in results["benchmarks"].items(): + for variant, variant_result in bench_result["variants"].items(): + for metric, value in variant_result["metrics"].items(): + try: + ret[metric][bench][variant] = variant_result["metrics"][metric] + except KeyError: + ret[metric][bench] = { + variant: variant_result["metrics"][metric] + } + return ret + + + def __call__(self, results): + data = { + "metrics": self._organize_results_into_metrics(results), + "variants": list(results["benchmarks"].values())[0]["variants"], + } + + env = jinja2.Environment( + loader=jinja2.BaseLoader, autoescape=jinja2.select_autoescape( + enabled_extensions=("html"), + default_for_string=True)) + template = env.from_string(self._get_template()) + output = template.render(d=data)[:-1] + with self.get_out_file() as handle: + print(output, file=handle) diff --git a/tools/benchcomp/configs/perf-regression.yaml b/tools/benchcomp/configs/perf-regression.yaml index cb9d0c493fb4..a76cb5ab54bf 100644 --- a/tools/benchcomp/configs/perf-regression.yaml +++ b/tools/benchcomp/configs/perf-regression.yaml @@ -29,7 +29,10 @@ run: visualize: - type: dump_yaml - out_file: '/tmp/result.yaml' + out_file: '-' + + - type: dump_markdown_results_table + out_file: '-' - type: error_on_regression variant_pairs: [[kani_old, kani_new]] diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py index 3e6afb832662..258005f84b57 100644 --- a/tools/benchcomp/test/test_regression.py +++ b/tools/benchcomp/test/test_regression.py @@ -8,6 +8,7 @@ import pathlib import subprocess import tempfile +import textwrap import unittest import yaml @@ -391,6 +392,69 @@ def test_error_on_regression_visualization_ratio_regressed(self): run_bc.proc.returncode, 1, msg=run_bc.stderr) + def test_markdown_results_table(self): + """Run the markdown results table visualization""" + + with tempfile.TemporaryDirectory() as tmp: + run_bc = Benchcomp({ + "variants": { + "variant_1": { + "config": { + "directory": str(tmp), + "command_line": + "mkdir bench_1 bench_2" + "&& echo true > bench_1/success" + "&& echo true > bench_2/success" + "&& echo 5 > bench_1/runtime" + "&& echo 10 > bench_2/runtime" + }, + }, + "variant_2": { + "config": { + "directory": str(tmp), + "command_line": + "mkdir bench_1 bench_2" + "&& echo true > bench_1/success" + "&& echo false > bench_2/success" + "&& echo 10 > bench_1/runtime" + "&& echo 5 > bench_2/runtime" + } + } + }, + "run": { + "suites": { + "suite_1": { + "parser": { "module": "test_file_to_metric" }, + "variants": ["variant_1", "variant_2"] + } + } + }, + "visualize": [{ + "type": "dump_markdown_results_table", + "out_file": "-", + }] + }) + run_bc() + + self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr) + self.assertEqual( + run_bc.stdout, textwrap.dedent(""" + ## runtime + + | Benchmark | variant_1 | variant_2 | + | --- | --- |--- | + | bench_1 | 5 | 10 | + | bench_2 | 10 | 5 | + + ## success + + | Benchmark | variant_1 | variant_2 | + | --- | --- |--- | + | bench_1 | True | True | + | bench_2 | True | False | + """)) + + def test_only_dump_yaml(self): """Ensure that benchcomp terminates with return code 0 when `--only dump_yaml` is passed, even if the error_on_regression visualization would have resulted in a return code of 1"""