From b320af6cea13eb6e8ea286228f931e90d04a2d5f Mon Sep 17 00:00:00 2001
From: John Wiggins
Date: Wed, 3 Mar 2021 10:14:55 +0100
Subject: [PATCH] Clean up the benchmark suite
---
enable/gcbench/bench.py | 61 ++++++++------
enable/gcbench/data.py | 97 +++++++++++++++++++++
enable/gcbench/publish.py | 173 +++++++++++++++++++++-----------------
3 files changed, 228 insertions(+), 103 deletions(-)
create mode 100644 enable/gcbench/data.py
diff --git a/enable/gcbench/bench.py b/enable/gcbench/bench.py
index 1083f7eca..152ba3cd4 100644
--- a/enable/gcbench/bench.py
+++ b/enable/gcbench/bench.py
@@ -14,10 +14,12 @@
import numpy as np
+from enable.gcbench.data import BenchResult, BenchTiming
+
_MAX_DURATION = 1.0
_SIZE = (512, 512)
_BACKENDS = {
- "ui": {
+ "gui": {
"kiva.agg": "enable.null.image",
"cairo": "enable.null.cairo",
"celiagg": "enable.null.celiagg",
@@ -37,8 +39,8 @@ def benchmark(outdir=None):
""" Benchmark all backends
"""
suite = gen_suite()
+ results = {btype: {} for btype in _BACKENDS}
- results = {t: {} for t in _BACKENDS}
for btype, backends in _BACKENDS.items():
for name, mod_name in backends.items():
print(f"Benchmarking backend: {name}", end="")
@@ -48,12 +50,13 @@ def benchmark(outdir=None):
print(" ... Not available")
continue
- # UI backends are checked for performance, File backends are not.
- if btype == "ui":
+ if btype == "gui":
+ # GUI backends are checked for performance (and features).
results[btype][name] = benchmark_backend(
suite, name, module, outdir=outdir
)
else:
+ # File backends are checked for feature coverage.
# XXX: Use the fact that `name` is the same as the file ext.
results[btype][name] = exercise_backend(
suite, name, module, extension=name, outdir=outdir
@@ -70,32 +73,35 @@ def benchmark_backend(suite, mod_name, module, outdir=None):
results = {}
for name, symbol in suite.items():
+ # Result `summary` defaults to "fail"
+ results[name] = result = BenchResult()
+
print(f"\n\tBenchmark {name}", end="")
try:
instance = symbol(gc, module)
except Exception:
+ print(f" ... Failed", end="")
continue
if name.endswith("2x"):
# Double sized
with gc:
gc.scale_ctm(2, 2)
- stats = gen_timings(gc, instance)
+ timing = gen_timing(gc, instance)
else:
# Normal scale
- stats = gen_timings(gc, instance)
+ timing = gen_timing(gc, instance)
- if stats is None:
+ if timing is None:
print(f" ... Failed", end="")
- results[name] = None
continue
- results[name] = {"times": stats}
+ result.timing = timing
+ result.summary = "success"
if outdir is not None:
fname = os.path.join(outdir, f"{mod_name}.{name}.png")
gc.save(fname)
- results[name]["format"] = "png"
- results[name]["filename"] = os.path.basename(fname)
+ result.output = os.path.basename(fname)
print() # End the line that was left
return results
@@ -106,11 +112,14 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None):
"""
GraphicsContext = getattr(module, "GraphicsContext")
- results = {name: None for name in suite}
+ results = {}
for name, symbol in suite.items():
+ # Result `summary` defaults to "fail"
+ results[name] = result = BenchResult()
+
# Skip 2x versions
if name.endswith("2x"):
- results[name] = {"skip": True}
+ result.summary = "skip"
continue
# Use a fresh context each time
@@ -120,20 +129,21 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None):
try:
instance = symbol(gc, module)
except Exception:
+ print(f" ... Failed", end="")
continue
try:
instance()
+ result.summary = "success"
except Exception:
print(f" ... Failed", end="")
continue
- results[name] = {"times": {}}
if outdir is not None:
fname = os.path.join(outdir, f"{mod_name}.{name}.{extension}")
gc.save(fname)
- results[name]["format"] = extension
- results[name]["filename"] = os.path.basename(fname)
+ # Record the output
+ result.output = os.path.basename(fname)
print() # End the line that was left
return results
@@ -142,6 +152,7 @@ def exercise_backend(suite, mod_name, module, extension, outdir=None):
def gen_suite():
""" Create a suite of benchmarks to run against each backend
"""
+ # Import here so we can use `suite` as a name elsewhere.
from enable.gcbench import suite
benchmarks = {}
@@ -149,12 +160,12 @@ def gen_suite():
symbol = getattr(suite, name)
if inspect.isclass(symbol):
benchmarks[name] = symbol
- benchmarks[f"{name} 2x"] = symbol
+ benchmarks[f"{name}_2x"] = symbol
return benchmarks
-def gen_timings(gc, func):
+def gen_timing(gc, func):
""" Run a function multiple times and generate some stats
"""
duration = 0.0
@@ -174,10 +185,10 @@ def gen_timings(gc, func):
return None
times = np.array(times)
- return {
- "mean": times.mean() * 1000,
- "min": times.min() * 1000,
- "max": times.max() * 1000,
- "std": times.std() * 1000,
- "count": len(times),
- }
+ return BenchTiming(
+ count=len(times),
+ mean=times.mean() * 1000,
+ minimum=times.min() * 1000,
+ maximum=times.max() * 1000,
+ stddev=times.std() * 1000,
+ )
diff --git a/enable/gcbench/data.py b/enable/gcbench/data.py
new file mode 100644
index 000000000..44cf4a161
--- /dev/null
+++ b/enable/gcbench/data.py
@@ -0,0 +1,97 @@
+# (C) Copyright 2005-2021 Enthought, Inc., Austin, TX
+# All rights reserved.
+#
+# This software is provided without warranty under the terms of the BSD
+# license included in LICENSE.txt and may be redistributed only under
+# the conditions described in the aforementioned license. The license
+# is also available online at http://www.enthought.com/licenses/BSD.txt
+#
+# Thanks for using Enthought open source!
+import os
+
+from traits.api import (
+ Enum, File, Float, HasStrictTraits, Instance, Int, Property, Str
+)
+
+
+class BenchResult(HasStrictTraits):
+ """ The result of a benchmark run on a single backend
+ """
+ #: Short status field for checking the outcome of a benchmark
+ # Default to "fail"!
+ summary = Enum("fail", "skip", "success")
+
+ #: A path to an output file and its format
+ output = File()
+ output_format = Property(Str(), observe="output")
+
+ #: Timing results
+ timing = Instance("BenchTiming")
+
+ def _get_output_format(self):
+ if self.output:
+ return os.path.splitext(self.output)[-1]
+ return ""
+
+ def compare_to(self, other):
+ return BenchComparison.from_pair(self, baseline=other)
+
+
+class BenchComparison(HasStrictTraits):
+ """ A comparison table entry.
+ """
+ #: CSS class to use for `td`
+ css_class = Enum("valid", "invalid", "skipped")
+
+ #: The content for the `td`
+ value = Str()
+
+ @classmethod
+ def from_pair(cls, result, baseline=None):
+ """ Create an instance from two BenchResult instances.
+ """
+ if result.summary == "fail":
+ return cls(value="\N{HEAVY BALLOT X}", css_class="invalid")
+
+ elif result.summary == "skip":
+ return cls(value="\N{HEAVY MINUS SIGN}", css_class="skipped")
+
+ elif result.summary == "success":
+ if result.timing is not None:
+ # Compare timing to the baseline result
+ relvalue = baseline.timing.mean / result.timing.mean
+ return cls(value=f"{relvalue:0.2f}", css_class="valid")
+ else:
+ # No timing, but the result was successful
+ return cls(value="\N{HEAVY CHECK MARK}", css_class="valid")
+
+ else:
+ raise RuntimeError("Unhandled result `summary`")
+
+ return None
+
+
+class BenchTiming(HasStrictTraits):
+ """ The timing results of a single benchmark.
+ """
+ #: How many times the benchmark ran
+ count = Int(0)
+
+ #: avg/min/max/std
+ mean = Float(0.0)
+ minimum = Float(0.0)
+ maximum = Float(0.0)
+ stddev = Float(0.0)
+
+ def to_html(self):
+ """ Format this instance as an HTML
+ """
+ names = ("mean", "minimum", "maximum", "stddev", "count")
+ rows = [
+ (f"| {name.capitalize()} | "
+ f"{getattr(self, name):0.4f} |
")
+ for name in names
+ ]
+
+ rows = "\n".join(rows)
+ return f''
diff --git a/enable/gcbench/publish.py b/enable/gcbench/publish.py
index 4163489de..9e8e55f3a 100644
--- a/enable/gcbench/publish.py
+++ b/enable/gcbench/publish.py
@@ -21,11 +21,16 @@
Kiva Backend Benchmark Results
All results are shown relative to the kiva.agg backend. Numbers less than 1.0
indicate a slower result and numbers greater than 1.0 indicate a faster result.
+
+For backends that aren't timed:
+"\N{HEAVY CHECK MARK}" indicates a successful run
+"\N{HEAVY BALLOT X}" indicates a failed run
+"\N{HEAVY MINUS SIGN}" indicates a skipped run
{comparison_table}
@@ -70,9 +85,7 @@
"""
_TABLE_TEMPLATE = """
-
-{headers}
-
+{headers}
{rows}
"""
@@ -82,21 +95,30 @@ def publish(results, outdir):
""" Write the test results out as a simple webpage.
"""
backends = []
- functions = {}
+ benchmarks = {}
- # Transpose the results so that they're accesible by function.
+ # Transpose the results so that they're accesible by benchmark.
for btype, backend_results in results.items():
backends.extend(list(backend_results))
for bend in backend_results:
- for name, res in backend_results[bend].items():
- functions.setdefault(name, {})[bend] = res
+ for benchmark_name, res in backend_results[bend].items():
+ benchmarks.setdefault(benchmark_name, {})[bend] = res
+ # Convert each benchmark into an output comparison page and a row for the
+ # comparison table.
comparisons = {}
- for name, results in functions.items():
- _build_function_page(name, results, outdir)
- # Scale timing values relative to the "kiva.agg" backend implementation
- comparisons[name] = _format_benchmark(results, "kiva.agg")
-
+ for benchmark_name, benchmark_results in benchmarks.items():
+ _build_output_comparison_page(
+ benchmark_name, benchmark_results, outdir
+ )
+ # Compare each result to the "kiva.agg" result
+ baseline = benchmark_results["kiva.agg"]
+ comparisons[benchmark_name] = {
+ name: result.compare_to(baseline)
+ for name, result in benchmark_results.items()
+ }
+
+ # Fill out the comparison table and write the summary index
comparison_table = _build_comparison_table(backends, comparisons)
path = os.path.join(outdir, "index.html")
with open(path, "w") as fp:
@@ -106,48 +128,49 @@ def publish(results, outdir):
def _build_comparison_table(backends, comparisons):
""" Build some table data for comparison of backend performance timings.
"""
- # All the row data
+ # Headers
+ headers = ["Draw Function"] + backends
+ headers = "\n".join(_th(head) for head in headers)
+
+ # Build the rows
rows = []
- for name, stats in comparisons.items():
- # Start the row off with the name of the function
- # Link to the table of images created by each backend
- link = f''
- row = [f"{link}{name} | "]
+ for benchmark_name, comparisons in comparisons.items():
+ # Start the row off with the name of the benchmark
+ # Link to the benchmark output comparison page
+ row = [_td(_link(f"{benchmark_name}.html", benchmark_name))]
+
+ # Add column entries for the BenchComparisons, ordered by backend
for bend in backends:
- # Each backend stat includes a CSS class for table styling
- stat, klass = stats[bend]
- row.append(f'{stat} | ')
- # Concat all the 's into a single string
- rows.append("".join(row))
- # Concat all the | 's into a multiline string.
- rows = "\n".join(f"
{row}
" for row in rows)
+ comp = comparisons[bend]
+ row.append(f'{comp.value} | ')
- # Headers
- headers = ["Draw Function"] + backends
- headers = "\n".join(f"{head} | " for head in headers)
+ # Concat all the columns into a single table row
+ rows.append(_tr("".join(row)))
+ rows = "\n".join(rows)
# Smash it all together in the template
return _TABLE_TEMPLATE.format(headers=headers, rows=rows)
-def _build_function_page(benchmark_name, results, outdir):
+def _build_output_comparison_page(benchmark_name, backend_results, outdir):
""" Build a page which shows backend outputs next to each other.
"""
+ # Headers
+ headers = ("Backend", "Output", "Timing")
+ headers = "".join(_th(name) for name in headers)
+
# Build the rows
- backends = []
- output_tds, stat_tds = "", ""
- for backend_name, result in results.items():
- if result is None or "skip" in result:
+ rows = []
+ for backend_name, result in backend_results.items():
+ # If no file was output, skip
+ if not result.output:
continue
- backends.append(backend_name)
- output_tds += f"{_format_output(result)} | "
- stat_tds += f"{_format_stats(result['times'])} | "
-
- rows = f"{output_tds}
\n{stat_tds}
"
-
- # Headers
- headers = "\n".join(f"{name} | " for name in backends)
+ # A row is [Backend | Output | Timing]
+ output = _format_output(result)
+ timing = _format_timing(result)
+ rows.append(_tr(f"{_td(backend_name)}{_td(output)}{_td(timing)}"))
+ rows = "\n".join(rows)
table = _TABLE_TEMPLATE.format(headers=headers, rows=rows)
content = _IMAGE_PAGE_TEMPLATE.format(
@@ -159,46 +182,40 @@ def _build_function_page(benchmark_name, results, outdir):
fp.write(content)
-def _format_benchmark(results, baseline):
- """ Convert stats for backend benchmark runs into data for a table row.
- """
- basevalue = results[baseline]["times"]["mean"]
- formatted = {}
- for name, result in results.items():
- if result is not None:
- stats = result.get("times", {})
- if stats:
- relvalue = basevalue / stats["mean"]
- formatted[name] = (f"{relvalue:0.2f}", "valid")
- else:
- if "skip" in result:
- # Benchmark was skipped
- formatted[name] = ("\N{HEAVY MINUS SIGN}", "skipped")
- else:
- # No times, but the backend succeeded
- formatted[name] = ("\N{HEAVY CHECK MARK}", "valid")
- else:
- formatted[name] = ("\N{HEAVY BALLOT X}", "invalid")
-
- return formatted
-
-
def _format_output(result):
""" Convert the output from a single benchmark run into an image embed or
link.
"""
- if result["format"] in ("png", "svg"):
- return f'
'
+ if result.output_format in (".png", ".svg"):
+ return _img(result.output)
else:
- return f'download'
+ return _link(result.output, "download")
-def _format_stats(stats):
+def _format_timing(result):
""" Convert timing stats for a single benchmark run into a table.
"""
- rows = [
- f"| {key.capitalize()} | {value:0.4f} |
"
- for key, value in stats.items()
- ]
- rows = "\n".join(rows)
- return f"Timings:
"
+ if result.timing is None:
+ return ""
+ return result.timing.to_html()
+
+
+# HTML utils
+def _img(src):
+ return f'
'
+
+
+def _link(target, text):
+ return f'{text}'
+
+
+def _td(data, **attrs):
+ return f"{data} | "
+
+
+def _th(data):
+ return f"{data} | "
+
+
+def _tr(data):
+ return f"{data}
"