diff --git a/cpp/build-support/convert-benchmark.py b/cpp/build-support/convert-benchmark.py
new file mode 100755
index 00000000000..dcc7332ffc7
--- /dev/null
+++ b/cpp/build-support/convert-benchmark.py
@@ -0,0 +1,299 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from functools import cmp_to_key, lru_cache
+from itertools import filterfalse, groupby, tee
+import json
+import six
+from socket import gethostname
+import subprocess
+import sys
+from uuid import getnode
+
+
+def memoize(fn):
+    return lru_cache(maxsize=1)(fn)
+
+
+def partition(pred, iterable):
+    # adapted from python's examples
+    t1, t2 = tee(iterable)
+    return list(filter(pred, t1)), list(filterfalse(pred, t2))
+
+
+# Taken from merge_arrow_pr.py
+def run_cmd(cmd):
+    if isinstance(cmd, six.string_types):
+        cmd = cmd.split(' ')
+
+    try:
+        output = subprocess.check_output(cmd)
+    except subprocess.CalledProcessError as e:
+        # this avoids hiding the stdout / stderr of failed processes
+        print('Command failed: %s' % cmd)
+        print('With output:')
+        print('--------------')
+        print(e.output)
+        print('--------------')
+        raise e
+
+    if isinstance(output, six.binary_type):
+        output = output.decode('utf-8')
+    return output.rstrip()
+
+
+class Context:
+    """ Represents the runtime environment """
+
+    def __init__(self, date=None, executable=None, **kwargs):
+        self.date = date
+        self.executable = executable
+
+    @property
+    def host(self):
+        host = {
+            "hostname": gethostname(),
+            # Not sure if we should leak this.
+            "mac_address": getnode(),
+        }
+        return host
+
+    @property
+    def git(self):
+        head = run_cmd("git rev-parse HEAD")
+        # %ai: author date, ISO 8601-like format
+        fmt = "%ai"
+        timestamp = run_cmd(f"git log -1 --pretty='{fmt}' {head}")
+        branch = run_cmd("git rev-parse --abbrev-ref HEAD")
+        git_info = {
+            "git_commit_timestamp": timestamp,
+            "git_hash": head,
+            "git_branch": branch,
+        }
+        return git_info
+
+    @property
+    def toolchain(self):
+        # TODO parse local CMake generated info to extract compile flags and
+        # arrow features
+        deps = {}
+        toolchain = {
+            "language_implementation_version": "c++11",
+            "dependencies": deps,
+        }
+        return toolchain
+
+    def as_arrow(self):
+        ctx = {
+            "benchmark_language": "C++",
+            "run_timestamp": self.date,
+        }
+
+        for extra in (self.host, self.git, self.toolchain):
+            ctx.update(extra)
+
+        return ctx
+
+    @classmethod
+    def from_json(cls, version, payload):
+        return cls(**payload)
+
+
+class BenchmarkObservation:
+    def __init__(self, version, **kwargs):
+        self._name = kwargs.get("name")
+        self.version = version
+        self.real_time = kwargs.get("real_time")
+        self.cpu_time = kwargs.get("cpu_time")
+        self.time_unit = kwargs.get("time_unit")
+        self.size = kwargs.get("size")
+        self.bytes_per_second = kwargs.get("bytes_per_second")
+
+    @property
+    def is_mean(self):
+        return self._name.endswith("_mean")
+
+    @property
+    def is_median(self):
+        return self._name.endswith("_median")
+
+    @property
+    def is_stddev(self):
+        return self._name.endswith("_stddev")
+
+    @property
+    def is_agg(self):
+        return self.is_mean or self.is_median or self.is_stddev
+
+    @property
+    def is_realtime(self):
+        return self.name.find("/realtime") != -1
+
+    @property
+    @memoize
+    def name(self):
+        name = self._name
+        return name.rsplit("_", maxsplit=1)[0] if self.is_agg else name
+
+    @property
+    def value(self):
+        """ Return the benchmark value."""
+        if self.size:
+            return self.bytes_per_second
+        return self.real_time if self.is_realtime else self.cpu_time
+
+    @property
+    def unit(self):
+        if self.size:
+            return "bytes_per_second"
+        return self.time_unit
+
+    def __str__(self):
+        return f"BenchmarkObservation[name={self.name}]"
+
+
+class BenchmarkSuite:
+    def __init__(self, name, version, runs):
+        self.name = name
+        self.version = version
+        # exclude google benchmark aggregate artifacts
+        aggs, runs = partition(lambda b: b.is_agg, runs)
+        self.runs = sorted(runs, key=lambda b: b.value)
+        self.values = [b.value for b in self.runs]
+        self.aggregates = aggs
+
+    @property
+    def n_runs(self):
+        return len(self.runs)
+
+    @property
+    @memoize
+    def mean(self):
+        maybe_mean = [b for b in self.aggregates if b.is_mean]
+        if maybe_mean:
+            return maybe_mean[0].value
+        # fallback
+        return sum(self.values) / self.n_runs
+
+    @property
+    @memoize
+    def median(self):
+        maybe_median = [b for b in self.aggregates if b.is_median]
+        if maybe_median:
+            return maybe_median[0].value
+        # fallback
+        return self.runs[int(self.n_runs / 2)].value
+
+    @property
+    @memoize
+    def stddev(self):
+        maybe_stddev = [b for b in self.aggregates if b.is_stddev]
+        if maybe_stddev:
+            return maybe_stddev[0].value
+
+        sum_diff = sum([(val - self.mean)**2 for val in self.values])
+        return (sum_diff / (self.n_runs - 1))**0.5 if self.n_runs > 1 else 0.0
+
+    @property
+    def min(self):
+        return self.values[0]
+
+    @property
+    def max(self):
+        return self.values[-1]
+
+    def quartile(self, q):
+        return self.values[int(q * self.n_runs / 4)]
+
+    @property
+    def q1(self):
+        return self.quartile(1)
+
+    @property
+    def q3(self):
+        return self.quartile(3)
+
+    @property
+    def parameters(self):
+        """ Extract parameters from Benchmark's name"""
+        def parse_param(idx, param):
+            k_v = param.split(":")
+            # nameless parameter are transformed into positional names
+            name = k_v[0] if len(k_v) > 1 else f"arg{idx}"
+            return name, k_v[-1]
+
+        params = enumerate(self.name.split("/")[1:])
+        named_params = [parse_param(idx, p) for idx, p in params if p]
+        return {k: v for k, v in named_params}
+
+    def as_arrow(self):
+        n_runs = self.n_runs
+        run = {
+            "benchmark_name": self.name,
+            "value": self.mean,
+            "val_min": self.min,
+            "val_q1": self.q1,
+            "median": self.median,
+            "val_q3": self.q3,
+            "val_max": self.max,
+            "std_dev": self.stddev,
+            "n_obs": n_runs,
+        }
+
+        params = self.parameters
+        if params:
+            run["parameter_values"] = params
+
+        return run
+
+    def __str__(self):
+        return f"BenchmarkSuite[name={self.name},runs={len(self.runs)}]"
+
+    @classmethod
+    def from_json(cls, version, payload):
+        def group_key(x):
+            return x.name
+
+        benchmarks = map(lambda x: BenchmarkObservation(version, **x), payload)
+        groups = groupby(sorted(benchmarks, key=group_key), group_key)
+        return [cls(k, version, list(bs)) for k, bs in groups]
+
+
+def as_arrow(version, payload):
+    suites = BenchmarkSuite.from_json(version, payload.get("benchmarks", {}))
+    versions = {s.name: s.version for s in suites}
+    context = Context.from_json(version, payload.get("context", {}))
+    converted = {
+        "context": context.as_arrow(),
+        "benchmark_version": versions,
+        "benchmarks": list(map(lambda s: s.as_arrow(), suites))
+    }
+    return converted
+
+
+def main():
+    in_fd, out_fd = sys.stdin, sys.stdout
+    version = sys.argv[1]
+
+    benchmark_json = json.load(in_fd)
+    converted = as_arrow(version, benchmark_json)
+    json.dump(converted, out_fd)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cpp/build-support/run-benchmark.sh b/cpp/build-support/run-benchmark.sh
new file mode 100755
index 00000000000..622c34d8b6c
--- /dev/null
+++ b/cpp/build-support/run-benchmark.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -eux
+
+# See https://github.com/google/benchmark/blob/6a5c379ca/tools/gbench/report.py#L39
+# https://github.com/scipy/scipy/blob/c3fa90dcfcaef71/scipy/stats/stats.py#L4957
+: "${BENCHMARK_REPETITIONS:=20}"
+
+PWD=$(cd $(dirname "$BASH_SOURCE"); pwd)
+
+convert_benchmark() {
+  local version=$1
+
+  "${PWD}/convert-benchmark.py" "${version}"
+}
+
+run_benchmark() {
+  local bin=$1
+  shift
+
+  "${bin}" \
+    --benchmark_format=json \
+    --benchmark_repetitions=${BENCHMARK_REPETITIONS} \
+    "$@"
+}
+
+main() {
+  local build_dir=$1
+  local benchmark_bin=$2
+  local benchmark_src=$3
+  shift; shift; shift
+
+  local benchmark_dir=${build_dir}/benchmarks
+  mkdir -p "${benchmark_dir}"
+
+  # Extract benchmark's version by using hash of the original source file. This
+  # is not perfect, but a good enough proxy. Any change made to the benchmark
+  # file can invalidate previous runs.
+  local v="00000000000000000000000000000000"
+  if [ -e "${benchmark_src}" ]; then
+    v=$(md5sum "${benchmark_src}" | cut -d' ' -f1)
+  fi
+
+  local benchmark_name; benchmark_name=$(basename "${benchmark_bin}")
+  local orig_result=${benchmark_dir}/${benchmark_name}.json.original
+  local result=${benchmark_dir}/${benchmark_name}.json
+
+  # pass in the converter but also keep the original output for debugging
+  run_benchmark "${benchmark_bin}" "$@" | \
+    tee "${orig_result}" | \
+    convert_benchmark "${v}" > "${result}"
+}
+
+main "$@"
diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake
index 90944e61d3d..b3997ba69e3 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -389,7 +389,9 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
     set(BENCHMARK_NAME "${ARG_PREFIX}-${BENCHMARK_NAME}")
   endif()
 
-  if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
+  set(BENCHMARK_SOURCE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_BENCHMARK_NAME}.cc)
+
+  if(EXISTS ${BENCHMARK_SOURCE_PATH})
     # This benchmark has a corresponding .cc file, set it up as an executable.
     set(BENCHMARK_PATH "${EXECUTABLE_OUTPUT_PATH}/${BENCHMARK_NAME}")
     add_executable(${BENCHMARK_NAME} "${REL_BENCHMARK_NAME}.cc")
@@ -401,7 +403,7 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
       target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARROW_BENCHMARK_LINK_LIBS})
     endif()
     add_dependencies(benchmark ${BENCHMARK_NAME})
-    set(NO_COLOR "--color_print=false")
+    set(NO_COLOR "--benchmark_color=false")
 
     if(ARG_EXTRA_LINK_LIBS)
       target_link_libraries(${BENCHMARK_NAME} PRIVATE ${ARG_EXTRA_LINK_LIBS})
@@ -444,10 +446,10 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
   endif()
 
   add_test(${BENCHMARK_NAME}
-           ${BUILD_SUPPORT_DIR}/run-test.sh
+           ${BUILD_SUPPORT_DIR}/run-benchmark.sh
            ${CMAKE_BINARY_DIR}
-           benchmark
            ${BENCHMARK_PATH}
+           ${BENCHMARK_SOURCE_PATH}
            ${NO_COLOR})
   set_property(TEST ${BENCHMARK_NAME} APPEND PROPERTY LABELS ${ARG_LABELS})
 endfunction()