diff --git a/dev/archery/archery/benchmark/compare.py b/dev/archery/archery/benchmark/compare.py index bf9811f2c8d..8bacac02420 100644 --- a/dev/archery/archery/benchmark/compare.py +++ b/dev/archery/archery/benchmark/compare.py @@ -105,18 +105,17 @@ def __init__(self, contender, baseline, threshold=DEFAULT_THRESHOLD): self.baseline = baseline self.threshold = threshold - def comparisons(self, suite_filter=None, benchmark_filter=None): - """ - """ - contender = self.contender.suites(suite_filter, benchmark_filter) - baseline = self.baseline.suites(suite_filter, benchmark_filter) + @property + def comparisons(self): + contender = self.contender.suites + baseline = self.baseline.suites suites = pairwise_compare(contender, baseline) for suite_name, (suite_cont, suite_base) in suites: benchmarks = pairwise_compare( suite_cont.benchmarks, suite_base.benchmarks) - for bench_name, (bench_cont, bench_base) in benchmarks: + for _, (bench_cont, bench_base) in benchmarks: yield BenchmarkComparator(bench_cont, bench_base, threshold=self.threshold, suite_name=suite_name) diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index d6efb77dc22..bd2793eb4e8 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -18,6 +18,7 @@ from itertools import filterfalse, groupby, tee import json import subprocess +from tempfile import NamedTemporaryFile from .core import Benchmark from ..utils.command import Command @@ -49,13 +50,16 @@ def list_benchmarks(self): return str.splitlines(result.stdout.decode("utf-8")) def results(self): - argv = ["--benchmark_format=json", "--benchmark_repetitions=20"] + with NamedTemporaryFile() as out: + argv = ["--benchmark_repetitions=20", + f"--benchmark_out={out.name}", + "--benchmark_out_format=json"] - if self.benchmark_filter: - argv.append(f"--benchmark_filter={self.benchmark_filter}") + if self.benchmark_filter: + argv.append(f"--benchmark_filter={self.benchmark_filter}") - return json.loads(self.run(*argv, stdout=subprocess.PIPE, - stderr=subprocess.PIPE).stdout) + self.run(*argv, check=True) + return json.load(out) class GoogleBenchmarkObservation: diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 7dc56bdf096..dbbb3f5713c 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -16,6 +16,7 @@ # under the License. import glob +import json import os import re @@ -34,14 +35,94 @@ def regex_filter(re_expr): class BenchmarkRunner: - def suites(self, suite_filter=None, benchmark_filter=None): + def __init__(self, suite_filter=None, benchmark_filter=None): + self.suite_filter = suite_filter + self.benchmark_filter = benchmark_filter + + @property + def suites(self): raise NotImplementedError("BenchmarkRunner must implement suites") + @staticmethod + def from_rev_or_path(src, root, rev_or_path, cmake_conf, **kwargs): + """ Returns a BenchmarkRunner from a path or a git revision. + + First, it checks if `rev_or_path` is a valid path (or string) of a json + object that can deserialize to a BenchmarkRunner. If so, it initialize + a StaticBenchmarkRunner from it. This allows memoizing the result of a + run in a file or a string. + + Second, it checks if `rev_or_path` points to a valid CMake build + directory. If so, it creates a CppBenchmarkRunner with this existing + CMakeBuild. + + Otherwise, it assumes `rev_or_path` is a revision and clone/checkout + the given revision and create a fresh CMakeBuild. + """ + build = None + if StaticBenchmarkRunner.is_json_result(rev_or_path): + return StaticBenchmarkRunner.from_json(rev_or_path, **kwargs) + elif CMakeBuild.is_build_dir(rev_or_path): + build = CMakeBuild.from_path(rev_or_path) + return CppBenchmarkRunner(build, **kwargs) + else: + root_rev = os.path.join(root, rev_or_path) + os.mkdir(root_rev) + + clone_dir = os.path.join(root_rev, "arrow") + # Possibly checkout the sources at given revision, no need to + # perform cleanup on cloned repository as root_rev is reclaimed. + src_rev, _ = src.at_revision(rev_or_path, clone_dir) + cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) + build_dir = os.path.join(root_rev, "build") + return CppBenchmarkRunner(cmake_def.build(build_dir), **kwargs) + + +class StaticBenchmarkRunner(BenchmarkRunner): + """ Run suites from a (static) set of suites. """ + + def __init__(self, suites, **kwargs): + self._suites = suites + super().__init__(**kwargs) + + @property + def suites(self): + suite_fn = regex_filter(self.suite_filter) + benchmark_fn = regex_filter(self.benchmark_filter) + + for suite in (s for s in self._suites if suite_fn(s.name)): + benchmarks = [b for b in suite.benchmarks if benchmark_fn(b.name)] + yield BenchmarkSuite(suite.name, benchmarks) + + @classmethod + def is_json_result(cls, path_or_str): + builder = None + try: + builder = cls.from_json(path_or_str) + except BaseException: + pass + + return builder is not None + + @staticmethod + def from_json(path_or_str, **kwargs): + # breaks recursive imports + from ..utils.codec import BenchmarkRunnerCodec + path_or_str, json_load = (open(path_or_str), json.load) \ + if os.path.isfile(path_or_str) else (path_or_str, json.loads) + return BenchmarkRunnerCodec.decode(json_load(path_or_str), **kwargs) + + def __repr__(self): + return f"BenchmarkRunner[suites={list(self.suites)}]" + class CppBenchmarkRunner(BenchmarkRunner): - def __init__(self, build): + """ Run suites from a CMakeBuild. """ + + def __init__(self, build, **kwargs): """ Initialize a CppBenchmarkRunner. """ self.build = build + super().__init__(**kwargs) @property def suites_binaries(self): @@ -52,9 +133,9 @@ def suites_binaries(self): glob_expr = os.path.join(self.build.binaries_dir, "*-benchmark") return {os.path.basename(b): b for b in glob.glob(glob_expr)} - def suite(self, name, suite_bin, benchmark_filter): + def suite(self, name, suite_bin): """ Returns the resulting benchmarks for a given suite. """ - suite_cmd = GoogleBenchmarkCommand(suite_bin, benchmark_filter) + suite_cmd = GoogleBenchmarkCommand(suite_bin, self.benchmark_filter) # Ensure there will be data benchmark_names = suite_cmd.list_benchmarks() @@ -65,9 +146,10 @@ def suite(self, name, suite_bin, benchmark_filter): benchmarks = GoogleBenchmark.from_json(results.get("benchmarks")) return BenchmarkSuite(name, benchmarks) - def suites(self, suite_filter=None, benchmark_filter=None): + @property + def suites(self): """ Returns all suite for a runner. """ - suite_matcher = regex_filter(suite_filter) + suite_matcher = regex_filter(self.suite_filter) suite_and_binaries = self.suites_binaries for suite_name in suite_and_binaries: @@ -76,8 +158,7 @@ def suites(self, suite_filter=None, benchmark_filter=None): continue suite_bin = suite_and_binaries[suite_name] - suite = self.suite(suite_name, suite_bin, - benchmark_filter=benchmark_filter) + suite = self.suite(suite_name, suite_bin) # Filter may exclude all benchmarks if not suite: @@ -85,30 +166,3 @@ def suites(self, suite_filter=None, benchmark_filter=None): continue yield suite - - @staticmethod - def from_rev_or_path(src, root, rev_or_path, cmake_conf): - """ Returns a CppBenchmarkRunner from a path or a git revision. - - First, it checks if `rev_or_path` points to a valid CMake build - directory. If so, it creates a CppBenchmarkRunner with this existing - CMakeBuild. - - Otherwise, it assumes `rev_or_path` is a revision and clone/checkout - the given revision and create a fresh CMakeBuild. - """ - build = None - if CMakeBuild.is_build_dir(rev_or_path): - build = CMakeBuild.from_path(rev_or_path) - else: - root_rev = os.path.join(root, rev_or_path) - os.mkdir(root_rev) - - clone_dir = os.path.join(root_rev, "arrow") - # Possibly checkout the sources at given revision, no need to - # perform cleanup on cloned repository as root_rev is reclaimed. - src_rev, _ = src.at_revision(rev_or_path, clone_dir) - cmake_def = CppCMakeDefinition(src_rev.cpp, cmake_conf) - build = cmake_def.build(os.path.join(root_rev, "build")) - - return CppBenchmarkRunner(build) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 4fa88966080..b7b077ab87c 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -24,7 +24,7 @@ from tempfile import mkdtemp, TemporaryDirectory from .benchmark.compare import RunnerComparator, DEFAULT_THRESHOLD -from .benchmark.runner import CppBenchmarkRunner +from .benchmark.runner import BenchmarkRunner from .lang.cpp import CppCMakeDefinition, CppConfiguration from .utils.codec import JsonEncoder from .utils.logger import logger, ctx as log_ctx @@ -167,7 +167,77 @@ def benchmark(ctx): pass -@benchmark.command(name="diff", short_help="Run the C++ benchmark suite") +@benchmark.command(name="run", short_help="Run benchmark suite") +@click.option("--src", metavar="", show_default=True, + default=ArrowSources.find(), + callback=validate_arrow_sources, + help="Specify Arrow source directory") +@click.option("--suite-filter", metavar="", show_default=True, + type=str, default=None, help="Regex filtering benchmark suites.") +@click.option("--benchmark-filter", metavar="", show_default=True, + type=str, default=DEFAULT_BENCHMARK_FILTER, + help="Regex filtering benchmark suites.") +@click.option("--preserve", type=bool, default=False, show_default=True, + is_flag=True, help="Preserve workspace for investigation.") +@click.option("--output", metavar="", + type=click.File("w", encoding="utf8"), default="-", + help="Capture output result into file.") +@click.option("--cmake-extras", type=str, multiple=True, + help="Extra flags/options to pass to cmake invocation. " + "Can be stacked") +@click.argument("baseline", metavar="[]]", default="master", + required=False) +@click.pass_context +def benchmark_run(ctx, src, preserve, suite_filter, benchmark_filter, + output, cmake_extras, baseline): + """ Run benchmark suite. + + This command will run the benchmark suite for a single build. This is + used to capture (and/or publish) the results. + + The caller can optionally specify a target which is either a git revision + (commit, tag, special values like HEAD) or a cmake build directory. + + + When a commit is referenced, a local clone of the arrow sources (specified + via --src) is performed and the proper branch is created. This is done in + a temporary directory which can be left intact with the `---preserve` flag. + + The special token "WORKSPACE" is reserved to specify the current git + workspace. This imply that no clone will be performed. + + Examples: + + \b + # Run the benchmarks on current git workspace + \b + archery benchmark run + + \b + # Run the benchmarks on current previous commit + \b + archery benchmark run HEAD~1 + + \b + # Run the benchmarks on current previous commit + \b + archery benchmark run --output=run.json + """ + with tmpdir(preserve) as root: + logger.debug(f"Running benchmark {baseline}") + + conf = CppConfiguration( + build_type="release", with_tests=True, with_benchmarks=True, + with_python=False, cmake_extras=cmake_extras) + + runner_base = BenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + suite_filter=suite_filter, benchmark_filter=benchmark_filter) + + json.dump(runner_base, output, cls=JsonEncoder) + + +@benchmark.command(name="diff", short_help="Compare benchmark suites") @click.option("--src", metavar="", show_default=True, default=ArrowSources.find(), callback=validate_arrow_sources, @@ -182,6 +252,9 @@ def benchmark(ctx): @click.option("--threshold", type=float, default=DEFAULT_THRESHOLD, show_default=True, help="Regression failure threshold in percentage.") +@click.option("--output", metavar="", + type=click.File("w", encoding="utf8"), default="-", + help="Capture output result into file.") @click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " "Can be stacked") @@ -191,7 +264,7 @@ def benchmark(ctx): required=False) @click.pass_context def benchmark_diff(ctx, src, preserve, suite_filter, benchmark_filter, - threshold, cmake_extras, contender, baseline): + threshold, output, cmake_extras, contender, baseline): """ Compare (diff) benchmark runs. This command acts like git-diff but for benchmark results. @@ -245,6 +318,22 @@ def benchmark_diff(ctx, src, preserve, suite_filter, benchmark_filter, \b archery benchmark diff --suite-filter="^arrow-compute-aggregate" \\ --benchmark-filter="(Sum|Mean)Kernel" + + \b + # Capture result in file `result.json` + \b + archery benchmark diff --output=result.json + \b + # Equivalently with no stdout clutter. + archery --quiet benchmark diff > result.json + + \b + # Comparing with a cached results from `archery benchmark run` + \b + archery benchmark run --output=run.json HEAD~1 + \b + # This should not recompute the benchmark from run.json + archery --quiet benchmark diff WORKSPACE run.json > result.json """ with tmpdir(preserve) as root: logger.debug(f"Comparing {contender} (contender) with " @@ -254,18 +343,18 @@ def benchmark_diff(ctx, src, preserve, suite_filter, benchmark_filter, build_type="release", with_tests=True, with_benchmarks=True, with_python=False, cmake_extras=cmake_extras) - runner_cont = CppBenchmarkRunner.from_rev_or_path( - src, root, contender, conf) - runner_base = CppBenchmarkRunner.from_rev_or_path( - src, root, baseline, conf) - - runner_comp = RunnerComparator(runner_cont, runner_base, threshold) - comparisons = runner_comp.comparisons(suite_filter, benchmark_filter) + runner_cont = BenchmarkRunner.from_rev_or_path( + src, root, contender, conf, + suite_filter=suite_filter, benchmark_filter=benchmark_filter) + runner_base = BenchmarkRunner.from_rev_or_path( + src, root, baseline, conf, + suite_filter=suite_filter, benchmark_filter=benchmark_filter) regressions = 0 - for comparator in comparisons: + runner_comp = RunnerComparator(runner_cont, runner_base, threshold) + for comparator in runner_comp.comparisons: regressions += comparator.regression - print(json.dumps(comparator, cls=JsonEncoder)) + json.dump(comparator, output, cls=JsonEncoder) sys.exit(regressions) diff --git a/dev/archery/archery/utils/codec.py b/dev/archery/archery/utils/codec.py index 612f2df5605..abb91a075b4 100644 --- a/dev/archery/archery/utils/codec.py +++ b/dev/archery/archery/utils/codec.py @@ -18,26 +18,85 @@ import json +from ..benchmark.core import Benchmark, BenchmarkSuite +from ..benchmark.runner import BenchmarkRunner, StaticBenchmarkRunner from ..benchmark.compare import BenchmarkComparator class JsonEncoder(json.JSONEncoder): def default(self, o): + if isinstance(o, Benchmark): + return BenchmarkCodec.encode(o) + + if isinstance(o, BenchmarkSuite): + return BenchmarkSuiteCodec.encode(o) + + if isinstance(o, BenchmarkRunner): + return BenchmarkRunnerCodec.encode(o) + if isinstance(o, BenchmarkComparator): - comparator = { - "benchmark": o.name, - "change": o.change, - "regression": o.regression, - "baseline": o.baseline.value, - "contender": o.contender.value, - "unit": o.unit, - "less_is_better": o.less_is_better, - } - - suite_name = o.suite_name - if suite_name: - comparator["suite"] = suite_name - - return comparator + return BenchmarkComparatorCodec.encode(o) return json.JSONEncoder.default(self, o) + + +class BenchmarkCodec: + @staticmethod + def encode(b): + return { + "name": b.name, + "unit": b.unit, + "less_is_better": b.less_is_better, + "values": b.values.tolist(), + } + + @staticmethod + def decode(dct, **kwargs): + return Benchmark(**dct, **kwargs) + + +class BenchmarkSuiteCodec: + @staticmethod + def encode(bs): + return { + "name": bs.name, + "benchmarks": [BenchmarkCodec.encode(b) for b in bs.benchmarks] + } + + @staticmethod + def decode(dct, **kwargs): + benchmarks = [BenchmarkCodec.decode(b) + for b in dct.pop("benchmarks", [])] + return BenchmarkSuite(benchmarks=benchmarks, **dct, **kwargs) + + +class BenchmarkRunnerCodec: + @staticmethod + def encode(br): + return {"suites": [BenchmarkSuiteCodec.encode(s) for s in br.suites]} + + @staticmethod + def decode(dct, **kwargs): + suites = [BenchmarkSuiteCodec.decode(s) + for s in dct.pop("suites", [])] + return StaticBenchmarkRunner(suites=suites, **dct, **kwargs) + + +class BenchmarkComparatorCodec: + @staticmethod + def encode(bc): + comparator = { + "benchmark": bc.name, + "change": bc.change, + "regression": bc.regression, + "baseline": bc.baseline.value, + "contender": bc.contender.value, + "unit": bc.unit, + "less_is_better": bc.less_is_better, + } + + suite_name = bc.suite_name + if suite_name: + comparator["suite"] = suite_name + + return comparator diff --git a/docs/source/developers/benchmarks.rst b/docs/source/developers/benchmarks.rst index d0e6f1b8aa4..2ae767bdda6 100644 --- a/docs/source/developers/benchmarks.rst +++ b/docs/source/developers/benchmarks.rst @@ -42,12 +42,31 @@ the module files but uses the actual sources. # optional: enable bash/zsh autocompletion eval "$(_ARCHERY_COMPLETE=source archery)" +Running the benchmark suite +=========================== + +The benchmark suites can be ran with the ``benchmark run`` sub-command. + +.. code-block:: shell + + # Run benchmarks in the current git workspace + archery benchmark run + # Storing the results in a file + archery benchmark run --output=run.json + +Sometimes, it is required to pass custom CMake flags, e.g. + +.. code-block:: shell + + export CC=clang-8 CXX=clang++8 + archery benchmark run --cmake-extras="-DARROW_USE_SIMD=ON" + Comparison ========== One goal with benchmarking is to detect performance regressions. To this end, ``archery`` implements a benchmark comparison facility via the ``benchmark -diff`` command. +diff`` sub-command. In the default invocation, it will compare the current source (known as the current workspace in git) with local master branch. @@ -59,8 +78,8 @@ Iterating efficiently ~~~~~~~~~~~~~~~~~~~~~ Iterating with benchmark development can be a tedious process due to long -build time and long run times. ``archery benchmark diff`` provides 2 methods -to reduce this overhead. +build time and long run times. Multiple tricks can be used with +``archery benchmark diff`` to reduce this overhead. First, the benchmark command supports comparing existing build directories, This can be paired with the ``--preserve`` flag to @@ -77,7 +96,18 @@ avoid rebuilding sources from zero. # Re-run benchmark in the previously created build directory. archery benchmark diff /tmp/arrow-bench*/{WORKSPACE,master}/build -Second, the benchmark command supports filtering suites (``--suite-filter``) +Second, a benchmark run result can be saved in a json file. This also avoids +rebuilding the sources, but also executing the (sometimes) heavy benchmarks. +This technique can be used as a poor's man caching. + +.. code-block:: shell + + # Run the benchmarks on a given commit and save the result + archery benchmark run --output=run-head-1.json HEAD~1 + # Compare the previous captured result with HEAD + archery benchmark diff HEAD run-head-1.json + +Third, the benchmark command supports filtering suites (``--suite-filter``) and benchmarks (``--benchmark-filter``), both options supports regular expressions. @@ -89,8 +119,6 @@ expressions. --suite-filter=compute-aggregate --benchmark-filter=Kernel \ /tmp/arrow-bench*/{WORKSPACE,master}/build -Both methods can be combined. - Regression detection ==================== @@ -110,6 +138,16 @@ Writing a benchmark does not fit in memory (L2/L3), the benchmark will be memory bound instead of CPU bound. In this case, the input can be downsized. +4. By default, google's benchmark library will use the cputime metric, which + is the sum of runtime dedicated on the CPU for all threads of the process. + By contrast to realtime which is the wall clock time, e.g. the difference + between end_time - start_time. In a single thread model, the cputime is + preferable since it is less affected by context switching. In a multi thread + scenario, the cputime will give incorrect result since the since it'll + be inflated by the number of threads and can be far off realtime. Thus, if + the benchmark is multi threaded, it might be better to use + ``SetRealtime()``, see this `example `. + Scripting ========= @@ -125,3 +163,10 @@ output. This can be controlled/avoided with the ``--quiet`` option, e.g. {"benchmark": "BenchSumKernel/32768/0", "change": -0.6498, "regression": true, ... {"benchmark": "BenchSumKernel/32768/1", "change": 0.01553, "regression": false, ... ... + +or the ``--output=`` can be used, e.g. + +.. code-block:: shell + + archery benchmark diff --benchmark-filter=Kernel --output=compare.json + ...