From b320af6cea13eb6e8ea286228f931e90d04a2d5f Mon Sep 17 00:00:00 2001 From: John Wiggins Date: Wed, 3 Mar 2021 10:14:55 +0100 Subject: [PATCH] Clean up the benchmark suite --- enable/gcbench/bench.py | 61 ++++++++------ enable/gcbench/data.py | 97 +++++++++++++++++++++ enable/gcbench/publish.py | 173 +++++++++++++++++++++----------------- 3 files changed, 228 insertions(+), 103 deletions(-) create mode 100644 enable/gcbench/data.py diff --git a/enable/gcbench/bench.py b/enable/gcbench/bench.py index 1083f7eca..152ba3cd4 100644 --- a/enable/gcbench/bench.py +++ b/enable/gcbench/bench.py @@ -14,10 +14,12 @@ import numpy as np +from enable.gcbench.data import BenchResult, BenchTiming + _MAX_DURATION = 1.0 _SIZE = (512, 512) _BACKENDS = { - "ui": { + "gui": { "kiva.agg": "enable.null.image", "cairo": "enable.null.cairo", "celiagg": "enable.null.celiagg", @@ -37,8 +39,8 @@ def benchmark(outdir=None): """ Benchmark all backends """ suite = gen_suite() + results = {btype: {} for btype in _BACKENDS} - results = {t: {} for t in _BACKENDS} for btype, backends in _BACKENDS.items(): for name, mod_name in backends.items(): print(f"Benchmarking backend: {name}", end="") @@ -48,12 +50,13 @@ def benchmark(outdir=None): print(" ... Not available") continue - # UI backends are checked for performance, File backends are not. - if btype == "ui": + if btype == "gui": + # GUI backends are checked for performance (and features). results[btype][name] = benchmark_backend( suite, name, module, outdir=outdir ) else: + # File backends are checked for feature coverage. # XXX: Use the fact that `name` is the same as the file ext. results[btype][name] = exercise_backend( suite, name, module, extension=name, outdir=outdir @@ -70,32 +73,35 @@ def benchmark_backend(suite, mod_name, module, outdir=None): results = {} for name, symbol in suite.items(): + # Result `summary` defaults to "fail" + results[name] = result = BenchResult() + print(f"\n\tBenchmark {name}", end="") try: instance = symbol(gc, module) except Exception: + print(f" ... Failed", end="") continue if name.endswith("2x"): # Double sized with gc: gc.scale_ctm(2, 2) - stats = gen_timings(gc, instance) + timing = gen_timing(gc, instance) else: # Normal scale - stats = gen_timings(gc, instance) + timing = gen_timing(gc, instance) - if stats is None: + if timing is None: print(f" ... Failed", end="") - results[name] = None continue - results[name] = {"times": stats} + result.timing = timing + result.summary = "success" if outdir is not None: fname = os.path.join(outdir, f"{mod_name}.{name}.png") gc.save(fname) - results[name]["format"] = "png" - results[name]["filename"] = os.path.basename(fname) + result.output = os.path.basename(fname) print() # End the line that was left return results @@ -106,11 +112,14 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None): """ GraphicsContext = getattr(module, "GraphicsContext") - results = {name: None for name in suite} + results = {} for name, symbol in suite.items(): + # Result `summary` defaults to "fail" + results[name] = result = BenchResult() + # Skip 2x versions if name.endswith("2x"): - results[name] = {"skip": True} + result.summary = "skip" continue # Use a fresh context each time @@ -120,20 +129,21 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None): try: instance = symbol(gc, module) except Exception: + print(f" ... Failed", end="") continue try: instance() + result.summary = "success" except Exception: print(f" ... Failed", end="") continue - results[name] = {"times": {}} if outdir is not None: fname = os.path.join(outdir, f"{mod_name}.{name}.{extension}") gc.save(fname) - results[name]["format"] = extension - results[name]["filename"] = os.path.basename(fname) + # Record the output + result.output = os.path.basename(fname) print() # End the line that was left return results @@ -142,6 +152,7 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None): def gen_suite(): """ Create a suite of benchmarks to run against each backend """ + # Import here so we can use `suite` as a name elsewhere. from enable.gcbench import suite benchmarks = {} @@ -149,12 +160,12 @@ def gen_suite(): symbol = getattr(suite, name) if inspect.isclass(symbol): benchmarks[name] = symbol - benchmarks[f"{name} 2x"] = symbol + benchmarks[f"{name}_2x"] = symbol return benchmarks -def gen_timings(gc, func): +def gen_timing(gc, func): """ Run a function multiple times and generate some stats """ duration = 0.0 @@ -174,10 +185,10 @@ def gen_timings(gc, func): return None times = np.array(times) - return { - "mean": times.mean() * 1000, - "min": times.min() * 1000, - "max": times.max() * 1000, - "std": times.std() * 1000, - "count": len(times), - } + return BenchTiming( + count=len(times), + mean=times.mean() * 1000, + minimum=times.min() * 1000, + maximum=times.max() * 1000, + stddev=times.std() * 1000, + ) diff --git a/enable/gcbench/data.py b/enable/gcbench/data.py new file mode 100644 index 000000000..44cf4a161 --- /dev/null +++ b/enable/gcbench/data.py @@ -0,0 +1,97 @@ +# (C) Copyright 2005-2021 Enthought, Inc., Austin, TX +# All rights reserved. +# +# This software is provided without warranty under the terms of the BSD +# license included in LICENSE.txt and may be redistributed only under +# the conditions described in the aforementioned license. The license +# is also available online at http://www.enthought.com/licenses/BSD.txt +# +# Thanks for using Enthought open source! +import os + +from traits.api import ( + Enum, File, Float, HasStrictTraits, Instance, Int, Property, Str +) + + +class BenchResult(HasStrictTraits): + """ The result of a benchmark run on a single backend + """ + #: Short status field for checking the outcome of a benchmark + # Default to "fail"! + summary = Enum("fail", "skip", "success") + + #: A path to an output file and its format + output = File() + output_format = Property(Str(), observe="output") + + #: Timing results + timing = Instance("BenchTiming") + + def _get_output_format(self): + if self.output: + return os.path.splitext(self.output)[-1] + return "" + + def compare_to(self, other): + return BenchComparison.from_pair(self, baseline=other) + + +class BenchComparison(HasStrictTraits): + """ A comparison table entry. + """ + #: CSS class to use for `td` + css_class = Enum("valid", "invalid", "skipped") + + #: The content for the `td` + value = Str() + + @classmethod + def from_pair(cls, result, baseline=None): + """ Create an instance from two BenchResult instances. + """ + if result.summary == "fail": + return cls(value="\N{HEAVY BALLOT X}", css_class="invalid") + + elif result.summary == "skip": + return cls(value="\N{HEAVY MINUS SIGN}", css_class="skipped") + + elif result.summary == "success": + if result.timing is not None: + # Compare timing to the baseline result + relvalue = baseline.timing.mean / result.timing.mean + return cls(value=f"{relvalue:0.2f}", css_class="valid") + else: + # No timing, but the result was successful + return cls(value="\N{HEAVY CHECK MARK}", css_class="valid") + + else: + raise RuntimeError("Unhandled result `summary`") + + return None + + +class BenchTiming(HasStrictTraits): + """ The timing results of a single benchmark. + """ + #: How many times the benchmark ran + count = Int(0) + + #: avg/min/max/std + mean = Float(0.0) + minimum = Float(0.0) + maximum = Float(0.0) + stddev = Float(0.0) + + def to_html(self): + """ Format this instance as an HTML + """ + names = ("mean", "minimum", "maximum", "stddev", "count") + rows = [ + (f"" + f"") + for name in names + ] + + rows = "\n".join(rows) + return f'
{name.capitalize()}{getattr(self, name):0.4f}
{rows}
' diff --git a/enable/gcbench/publish.py b/enable/gcbench/publish.py index 4163489de..9e8e55f3a 100644 --- a/enable/gcbench/publish.py +++ b/enable/gcbench/publish.py @@ -21,11 +21,16 @@

Kiva Backend Benchmark Results

All results are shown relative to the kiva.agg backend. Numbers less than 1.0 indicate a slower result and numbers greater than 1.0 indicate a faster result. +

+For backends that aren't timed:
+"\N{HEAVY CHECK MARK}" indicates a successful run
+"\N{HEAVY BALLOT X}" indicates a failed run
+"\N{HEAVY MINUS SIGN}" indicates a skipped run

{comparison_table} @@ -54,11 +65,15 @@

@@ -70,9 +85,7 @@ """ _TABLE_TEMPLATE = """ - -{headers} - +{headers} {rows}
""" @@ -82,21 +95,30 @@ def publish(results, outdir): """ Write the test results out as a simple webpage. """ backends = [] - functions = {} + benchmarks = {} - # Transpose the results so that they're accesible by function. + # Transpose the results so that they're accesible by benchmark. for btype, backend_results in results.items(): backends.extend(list(backend_results)) for bend in backend_results: - for name, res in backend_results[bend].items(): - functions.setdefault(name, {})[bend] = res + for benchmark_name, res in backend_results[bend].items(): + benchmarks.setdefault(benchmark_name, {})[bend] = res + # Convert each benchmark into an output comparison page and a row for the + # comparison table. comparisons = {} - for name, results in functions.items(): - _build_function_page(name, results, outdir) - # Scale timing values relative to the "kiva.agg" backend implementation - comparisons[name] = _format_benchmark(results, "kiva.agg") - + for benchmark_name, benchmark_results in benchmarks.items(): + _build_output_comparison_page( + benchmark_name, benchmark_results, outdir + ) + # Compare each result to the "kiva.agg" result + baseline = benchmark_results["kiva.agg"] + comparisons[benchmark_name] = { + name: result.compare_to(baseline) + for name, result in benchmark_results.items() + } + + # Fill out the comparison table and write the summary index comparison_table = _build_comparison_table(backends, comparisons) path = os.path.join(outdir, "index.html") with open(path, "w") as fp: @@ -106,48 +128,49 @@ def publish(results, outdir): def _build_comparison_table(backends, comparisons): """ Build some table data for comparison of backend performance timings. """ - # All the row data + # Headers + headers = ["Draw Function"] + backends + headers = "\n".join(_th(head) for head in headers) + + # Build the rows rows = [] - for name, stats in comparisons.items(): - # Start the row off with the name of the function - # Link to the table of images created by each backend - link = f'' - row = [f"{link}{name}"] + for benchmark_name, comparisons in comparisons.items(): + # Start the row off with the name of the benchmark + # Link to the benchmark output comparison page + row = [_td(_link(f"{benchmark_name}.html", benchmark_name))] + + # Add column entries for the BenchComparisons, ordered by backend for bend in backends: - # Each backend stat includes a CSS class for table styling - stat, klass = stats[bend] - row.append(f'{stat}') - # Concat all the 's into a single string - rows.append("".join(row)) - # Concat all the 's into a multiline string. - rows = "\n".join(f"{row}" for row in rows) + comp = comparisons[bend] + row.append(f'{comp.value}') - # Headers - headers = ["Draw Function"] + backends - headers = "\n".join(f"{head}" for head in headers) + # Concat all the columns into a single table row + rows.append(_tr("".join(row))) + rows = "\n".join(rows) # Smash it all together in the template return _TABLE_TEMPLATE.format(headers=headers, rows=rows) -def _build_function_page(benchmark_name, results, outdir): +def _build_output_comparison_page(benchmark_name, backend_results, outdir): """ Build a page which shows backend outputs next to each other. """ + # Headers + headers = ("Backend", "Output", "Timing") + headers = "".join(_th(name) for name in headers) + # Build the rows - backends = [] - output_tds, stat_tds = "", "" - for backend_name, result in results.items(): - if result is None or "skip" in result: + rows = [] + for backend_name, result in backend_results.items(): + # If no file was output, skip + if not result.output: continue - backends.append(backend_name) - output_tds += f"{_format_output(result)}" - stat_tds += f"{_format_stats(result['times'])}" - - rows = f"{output_tds}\n{stat_tds}" - - # Headers - headers = "\n".join(f"{name}" for name in backends) + # A row is [Backend | Output | Timing] + output = _format_output(result) + timing = _format_timing(result) + rows.append(_tr(f"{_td(backend_name)}{_td(output)}{_td(timing)}")) + rows = "\n".join(rows) table = _TABLE_TEMPLATE.format(headers=headers, rows=rows) content = _IMAGE_PAGE_TEMPLATE.format( @@ -159,46 +182,40 @@ def _build_function_page(benchmark_name, results, outdir): fp.write(content) -def _format_benchmark(results, baseline): - """ Convert stats for backend benchmark runs into data for a table row. - """ - basevalue = results[baseline]["times"]["mean"] - formatted = {} - for name, result in results.items(): - if result is not None: - stats = result.get("times", {}) - if stats: - relvalue = basevalue / stats["mean"] - formatted[name] = (f"{relvalue:0.2f}", "valid") - else: - if "skip" in result: - # Benchmark was skipped - formatted[name] = ("\N{HEAVY MINUS SIGN}", "skipped") - else: - # No times, but the backend succeeded - formatted[name] = ("\N{HEAVY CHECK MARK}", "valid") - else: - formatted[name] = ("\N{HEAVY BALLOT X}", "invalid") - - return formatted - - def _format_output(result): """ Convert the output from a single benchmark run into an image embed or link. """ - if result["format"] in ("png", "svg"): - return f'' + if result.output_format in (".png", ".svg"): + return _img(result.output) else: - return f'download' + return _link(result.output, "download") -def _format_stats(stats): +def _format_timing(result): """ Convert timing stats for a single benchmark run into a table. """ - rows = [ - f"{key.capitalize()}{value:0.4f}" - for key, value in stats.items() - ] - rows = "\n".join(rows) - return f"

Timings:

{rows}
" + if result.timing is None: + return "" + return result.timing.to_html() + + +# HTML utils +def _img(src): + return f'' + + +def _link(target, text): + return f'{text}' + + +def _td(data, **attrs): + return f"{data}" + + +def _th(data): + return f"{data}" + + +def _tr(data): + return f"{data}"