From 1b5135ebd0a9c8f42066f54288c94d51e12d4bc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Tue, 7 May 2019 13:01:57 -0400 Subject: [PATCH 01/14] Refactor Builder benchmarks - Ensure that Builder benchmarks are working on inputs of the same (approximately) size in bytes. This allows relative comparison between builders. - Renamed benchmarks by prefixing `Regression`. - Fixed extra string copy in BuildStringDictionary. --- cpp/src/arrow/builder-benchmark.cc | 361 +++++++++++++---------------- 1 file changed, 159 insertions(+), 202 deletions(-) diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index 6ad860af000..b3ddf46030e 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -29,209 +29,185 @@ #include "arrow/memory_pool.h" #include "arrow/testing/gtest_util.h" #include "arrow/util/bit-util.h" +#include "arrow/util/string_view.h" namespace arrow { -constexpr int64_t kFinalSize = 256; +using ValueType = int64_t; +using VectorType = std::vector; +constexpr int64_t kNumberOfElements = 256 * 512; -static void BM_BuildPrimitiveArrayNoNulls( - benchmark::State& state) { // NOLINT non-const reference - // 2 MiB block - std::vector data(256 * 1024, 100); - while (state.KeepRunning()) { - Int64Builder builder; - for (int i = 0; i < kFinalSize; i++) { - // Build up an array of 512 MiB in size - ABORT_NOT_OK(builder.AppendValues(data.data(), data.size(), nullptr)); - } - std::shared_ptr out; - ABORT_NOT_OK(builder.Finish(&out)); - } - state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t) * - kFinalSize); +static VectorType AlmostU8CompressibleVector() { + VectorType data(kNumberOfElements, 64); + + // Insert an element late in the game that does not fit in the 8bit + // representation. This forces AdaptiveIntBuilder's to resize. + data[kNumberOfElements - 2] = 1L << 13; + + return data; } -static void BM_BuildVectorNoNulls( +constexpr int64_t kFinalSize = 256; +static VectorType kData = AlmostU8CompressibleVector(); +constexpr int64_t kBytesProcessPerRound = kNumberOfElements * sizeof(ValueType); +constexpr int64_t kBytesProcessed = kFinalSize * kBytesProcessPerRound; + +static const char* kBinaryString = "12345678"; +static arrow::util::string_view kBinaryView(kBinaryString); + +// This benchmarks acts as a reference to the native std::vector +// implementation. It appends kFinalSize chunks into a vector. +static void ReferenceBuildVectorNoNulls( benchmark::State& state) { // NOLINT non-const reference - // 2 MiB block - std::vector data(256 * 1024, 100); - while (state.KeepRunning()) { + for (auto _ : state) { std::vector builder; + for (int i = 0; i < kFinalSize; i++) { - // Build up an array of 512 MiB in size - builder.insert(builder.end(), data.cbegin(), data.cend()); + builder.insert(builder.end(), kData.cbegin(), kData.cend()); } } - state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t) * - kFinalSize); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildAdaptiveIntNoNulls( +static void RegressionBuildPrimitiveArrayNoNulls( benchmark::State& state) { // NOLINT non-const reference - int64_t size = static_cast(std::numeric_limits::max()) * 256; - int64_t chunk_size = size / 8; - std::vector data(size); - for (int64_t i = 0; i < size; i++) { - data[i] = i; - } - while (state.KeepRunning()) { - AdaptiveIntBuilder builder; - for (int64_t i = 0; i < size; i += chunk_size) { - // Build up an array of 128 MiB in size - ABORT_NOT_OK(builder.AppendValues(data.data() + i, chunk_size, nullptr)); + for (auto _ : state) { + Int64Builder builder; + + for (int i = 0; i < kFinalSize; i++) { + ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr)); } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t)); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildAdaptiveIntNoNullsScalarAppend( +static void RegressionBuildAdaptiveIntNoNulls( benchmark::State& state) { // NOLINT non-const reference - int64_t size = static_cast(std::numeric_limits::max()) * 256; - std::vector data(size); - for (int64_t i = 0; i < size; i++) { - data[i] = i; - } - while (state.KeepRunning()) { + for (auto _ : state) { AdaptiveIntBuilder builder; - for (int64_t i = 0; i < size; i++) { - ABORT_NOT_OK(builder.Append(data[i])); - } - std::shared_ptr out; - ABORT_NOT_OK(builder.Finish(&out)); - } - state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t)); -} -static void BM_BuildAdaptiveUIntNoNulls( - benchmark::State& state) { // NOLINT non-const reference - int64_t size = static_cast(std::numeric_limits::max()) * 256; - int64_t chunk_size = size / 8; - std::vector data(size); - for (uint64_t i = 0; i < static_cast(size); i++) { - data[i] = i; - } - while (state.KeepRunning()) { - AdaptiveUIntBuilder builder; - for (int64_t i = 0; i < size; i += chunk_size) { - // Build up an array of 128 MiB in size - ABORT_NOT_OK(builder.AppendValues(data.data() + i, chunk_size, nullptr)); + for (int i = 0; i < kFinalSize; i++) { + ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr)); } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t)); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildAdaptiveUIntNoNullsScalarAppend( +static void RegressionBuildAdaptiveIntNoNullsScalarAppend( benchmark::State& state) { // NOLINT non-const reference - int64_t size = static_cast(std::numeric_limits::max()) * 256; - std::vector data(size); - for (uint64_t i = 0; i < static_cast(size); i++) { - data[i] = i; - } - while (state.KeepRunning()) { - AdaptiveUIntBuilder builder; - for (int64_t i = 0; i < size; i++) { - ABORT_NOT_OK(builder.Append(data[i])); + for (auto _ : state) { + AdaptiveIntBuilder builder; + + for (int i = 0; i < kFinalSize; i++) { + for (size_t j = 0; j < kData.size(); j++) { + ABORT_NOT_OK(builder.Append(kData[i])) + } } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * data.size() * sizeof(int64_t)); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildBooleanArrayNoNulls( +static void RegressionBuildBooleanArrayNoNulls( benchmark::State& state) { // NOLINT non-const reference - // 2 MiB block - std::vector data(2 * 1024 * 1024); - constexpr uint8_t bit_pattern = 0xcc; // 0b11001100 - uint64_t index = 0; - std::generate(data.begin(), data.end(), - [&]() -> uint8_t { return (bit_pattern >> ((index++) % 8)) & 1; }); - - while (state.KeepRunning()) { + + size_t n_bytes = kData.size() * sizeof(ValueType); + const uint8_t* data = reinterpret_cast(kData.data()); + + for (auto _ : state) { BooleanBuilder builder; + for (int i = 0; i < kFinalSize; i++) { - // Build up an array of 512 MiB in size - ABORT_NOT_OK(builder.AppendValues(data.data(), data.size())); + ABORT_NOT_OK(builder.AppendValues(data, n_bytes)); } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * data.size() * kFinalSize); -} -static void BM_BuildBinaryArray(benchmark::State& state) { // NOLINT non-const reference - // About 160MB - const int64_t iterations = 1 << 24; - std::string value = "1234567890"; + state.SetBytesProcessed(state.iterations() * kBytesProcessed); +} +static void RegressionBuildBinaryArray( + benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { BinaryBuilder builder; - for (int64_t i = 0; i < iterations; i++) { - ABORT_NOT_OK(builder.Append(value)); + + for (int64_t i = 0; i < kFinalSize * kNumberOfElements; i++) { + ABORT_NOT_OK(builder.Append(kBinaryView)); } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * iterations * value.size()); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildChunkedBinaryArray( +static void RegressionBuildChunkedBinaryArray( benchmark::State& state) { // NOLINT non-const reference - // About 160MB - const int64_t iterations = 1 << 24; - std::string value = "1234567890"; + // 1MB chunks + const int32_t kChunkSize = 1 << 20; for (auto _ : state) { - // 1MB chunks - const int32_t chunksize = 1 << 20; - internal::ChunkedBinaryBuilder builder(chunksize); - for (int64_t i = 0; i < iterations; i++) { - ABORT_NOT_OK(builder.Append(reinterpret_cast(value.data()), - static_cast(value.size()))); + internal::ChunkedBinaryBuilder builder(kChunkSize); + + for (int64_t i = 0; i < kFinalSize * kNumberOfElements; i++) { + ABORT_NOT_OK(builder.Append(kBinaryView)); } + ArrayVector out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * iterations * value.size()); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildFixedSizeBinaryArray( +static void RegressionBuildFixedSizeBinaryArray( benchmark::State& state) { // NOLINT non-const reference - const int64_t iterations = 1 << 20; - const int width = 10; + auto type = fixed_size_binary(kBinaryView.size()); - auto type = fixed_size_binary(width); - const char value[width + 1] = "1234567890"; - - while (state.KeepRunning()) { + for (auto _ : state) { FixedSizeBinaryBuilder builder(type); - for (int64_t i = 0; i < iterations; i++) { - ABORT_NOT_OK(builder.Append(value)); + + for (int64_t i = 0; i < kFinalSize * kNumberOfElements; i++) { + ABORT_NOT_OK(builder.Append(kBinaryView)); } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * iterations * width); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } // ---------------------------------------------------------------------- // DictionaryBuilder benchmarks +size_t kDistinctElements = kNumberOfElements / 100; + // Testing with different distributions of integer values helps stress // the hash table's robustness. // Make a vector out of `n_distinct` sequential int values -template -static std::vector MakeSequentialIntDictFodder(int32_t n_values, - int32_t n_distinct) { +template +static std::vector MakeSequentialIntDictFodder() { std::default_random_engine gen(42); - std::vector values(n_values); + std::vector values(kNumberOfElements); { - std::uniform_int_distribution values_dist(0, n_distinct - 1); + std::uniform_int_distribution values_dist(0, kDistinctElements - 1); std::generate(values.begin(), values.end(), [&]() { return values_dist(gen); }); } return values; @@ -239,15 +215,15 @@ static std::vector MakeSequentialIntDictFodder(int32_t n_values, // Make a vector out of `n_distinct` int values with potentially colliding hash // entries as only their highest bits differ. -template -static std::vector MakeSimilarIntDictFodder(int32_t n_values, - int32_t n_distinct) { +template +static std::vector MakeSimilarIntDictFodder() { std::default_random_engine gen(42); - std::vector values(n_values); + std::vector values(kNumberOfElements); { - std::uniform_int_distribution values_dist(0, n_distinct - 1); + std::uniform_int_distribution values_dist(0, kDistinctElements - 1); auto max_int = std::numeric_limits::max(); - auto multiplier = static_cast(BitUtil::NextPower2(max_int / n_distinct / 2)); + auto multiplier = + static_cast(BitUtil::NextPower2(max_int / kDistinctElements / 2)); std::generate(values.begin(), values.end(), [&]() { return multiplier * values_dist(gen); }); } @@ -255,12 +231,11 @@ static std::vector MakeSimilarIntDictFodder(int32_t n_values, } // Make a vector out of `n_distinct` random int values -template -static std::vector MakeRandomIntDictFodder(int32_t n_values, - int32_t n_distinct) { +template +static std::vector MakeRandomIntDictFodder() { std::default_random_engine gen(42); - std::vector values_dict(n_distinct); - std::vector values(n_values); + std::vector values_dict(kDistinctElements); + std::vector values(kNumberOfElements); { std::uniform_int_distribution values_dist( @@ -269,19 +244,18 @@ static std::vector MakeRandomIntDictFodder(int32_t n_values, [&]() { return static_cast(values_dist(gen)); }); } { - std::uniform_int_distribution indices_dist(0, n_distinct - 1); + std::uniform_int_distribution indices_dist(0, kDistinctElements - 1); std::generate(values.begin(), values.end(), [&]() { return values_dict[indices_dist(gen)]; }); } return values; } -// Make a vector out of `n_distinct` string values -static std::vector MakeStringDictFodder(int32_t n_values, - int32_t n_distinct) { +// Make a vector out of `kDistinctElements` string values +static std::vector MakeStringDictFodder() { std::default_random_engine gen(42); - std::vector values_dict(n_distinct); - std::vector values(n_values); + std::vector values_dict(kDistinctElements); + std::vector values(kNumberOfElements); { auto it = values_dict.begin(); @@ -305,7 +279,7 @@ static std::vector MakeStringDictFodder(int32_t n_values, }); } { - std::uniform_int_distribution indices_dist(0, n_distinct - 1); + std::uniform_int_distribution indices_dist(0, kDistinctElements - 1); std::generate(values.begin(), values.end(), [&] { return values_dict[indices_dist(gen)]; }); } @@ -316,52 +290,61 @@ template static void BenchmarkScalarDictionaryArray( benchmark::State& state, // NOLINT non-const reference const std::vector& fodder) { - while (state.KeepRunning()) { + for (auto _ : state) { DictionaryBuilder builder(default_memory_pool()); - for (const auto value : fodder) { - ABORT_NOT_OK(builder.Append(value)); + + for (int64_t i = 0; i < kFinalSize; i++) { + for (const auto value : fodder) { + ABORT_NOT_OK(builder.Append(value)); + } } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * fodder.size() * sizeof(Scalar)); + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BM_BuildInt64DictionaryArrayRandom( +static void RegressionBuildInt64DictionaryArrayRandom( benchmark::State& state) { // NOLINT non-const reference - const auto fodder = MakeRandomIntDictFodder(10000, 100); + const auto fodder = MakeRandomIntDictFodder(); BenchmarkScalarDictionaryArray>(state, fodder); } -static void BM_BuildInt64DictionaryArraySequential( +static void RegressionBuildInt64DictionaryArraySequential( benchmark::State& state) { // NOLINT non-const reference - const auto fodder = MakeSequentialIntDictFodder(10000, 100); + const auto fodder = MakeSequentialIntDictFodder(); BenchmarkScalarDictionaryArray>(state, fodder); } -static void BM_BuildInt64DictionaryArraySimilar( +static void RegressionBuildInt64DictionaryArraySimilar( benchmark::State& state) { // NOLINT non-const reference - const auto fodder = MakeSimilarIntDictFodder(10000, 100); + const auto fodder = MakeSimilarIntDictFodder(); BenchmarkScalarDictionaryArray>(state, fodder); } -static void BM_BuildStringDictionaryArray( +static void RegressionBuildStringDictionaryArray( benchmark::State& state) { // NOLINT non-const reference - const auto fodder = MakeStringDictFodder(10000, 100); - auto type = binary(); + const auto fodder = MakeStringDictFodder(); auto fodder_size = - std::accumulate(fodder.begin(), fodder.end(), static_cast(0), + std::accumulate(fodder.begin(), fodder.end(), 0UL, [&](size_t acc, const std::string& s) { return acc + s.size(); }); - while (state.KeepRunning()) { + for (auto _ : state) { BinaryDictionaryBuilder builder(default_memory_pool()); - for (const auto& value : fodder) { - ABORT_NOT_OK(builder.Append(value)); + + for (int64_t i = 0; i < kFinalSize; i++) { + for (const auto& value : fodder) { + ABORT_NOT_OK(builder.Append(value)); + } } + std::shared_ptr out; ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * fodder_size); + + state.SetBytesProcessed(state.iterations() * fodder_size * kFinalSize); } static void BM_ArrayDataConstructDestruct( @@ -383,51 +366,25 @@ static void BM_ArrayDataConstructDestruct( // ---------------------------------------------------------------------- // Benchmark declarations +// -static constexpr int32_t kRepetitions = 2; +BENCHMARK(ReferenceBuildVectorNoNulls); -BENCHMARK(BM_ArrayDataConstructDestruct); +BENCHMARK(RegressionBuildBooleanArrayNoNulls); + +BENCHMARK(RegressionBuildPrimitiveArrayNoNulls); +BENCHMARK(RegressionBuildAdaptiveIntNoNulls); +BENCHMARK(RegressionBuildAdaptiveIntNoNullsScalarAppend); + +BENCHMARK(RegressionBuildBinaryArray); +BENCHMARK(RegressionBuildChunkedBinaryArray); +BENCHMARK(RegressionBuildFixedSizeBinaryArray); -BENCHMARK(BM_BuildPrimitiveArrayNoNulls) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildVectorNoNulls) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BuildBooleanArrayNoNulls) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BuildAdaptiveIntNoNulls) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildAdaptiveIntNoNullsScalarAppend) - ->Repetitions(3) - ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildAdaptiveUIntNoNulls) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildAdaptiveUIntNoNullsScalarAppend) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BuildBinaryArray)->MinTime(1.0)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildChunkedBinaryArray)->MinTime(1.0)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildFixedSizeBinaryArray)->MinTime(3.0)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BuildInt64DictionaryArrayRandom) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildInt64DictionaryArraySequential) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildInt64DictionaryArraySimilar) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BuildStringDictionaryArray) - ->Repetitions(kRepetitions) - ->Unit(benchmark::kMicrosecond); +BENCHMARK(RegressionBuildInt64DictionaryArrayRandom); +BENCHMARK(RegressionBuildInt64DictionaryArraySequential); +BENCHMARK(RegressionBuildInt64DictionaryArraySimilar); +BENCHMARK(RegressionBuildStringDictionaryArray); + +BENCHMARK(BM_ArrayDataConstructDestruct); } // namespace arrow From b7911df3dcc135951d12b105b030c1afc08ed931 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Tue, 7 May 2019 14:29:05 -0400 Subject: [PATCH 02/14] Refactor Bitmap benchmarks - Use the same buffer size in all bitmap benchmarks. - Fix some reporting numbers --- cpp/src/arrow/util/bit-util-benchmark.cc | 121 +++++++++++------------ 1 file changed, 55 insertions(+), 66 deletions(-) diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc index fbe786726fa..f34b9bbdeca 100644 --- a/cpp/src/arrow/util/bit-util-benchmark.cc +++ b/cpp/src/arrow/util/bit-util-benchmark.cc @@ -119,20 +119,24 @@ static void BenchmarkBitmapReader(benchmark::State& state, int64_t nbytes) { benchmark::DoNotOptimize(total); } } - state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes); + state.SetBytesProcessed(2LL * state.iterations() * nbytes); } +constexpr bool pattern[] = {false, false, false, true, true, true}; +static_assert( + (sizeof(pattern) / sizeof(pattern[0])) % 8 != 0, + "pattern must not be a multiple of 8, otherwise gcc can optimize with a memset"); + template static void BenchmarkBitmapWriter(benchmark::State& state, int64_t nbytes) { std::shared_ptr buffer = CreateRandomBuffer(nbytes); const int64_t num_bits = nbytes * 8; uint8_t* bitmap = buffer->mutable_data(); - const bool pattern[] = {false, false, false, true, true, true}; - while (state.KeepRunning()) { - int64_t pattern_index = 0; + for (auto _ : state) { BitmapWriterType writer(bitmap, 0, num_bits); + int64_t pattern_index = 0; for (int64_t i = 0; i < num_bits; i++) { if (pattern[pattern_index++]) { writer.Set(); @@ -147,7 +151,7 @@ static void BenchmarkBitmapWriter(benchmark::State& state, int64_t nbytes) { writer.Finish(); benchmark::ClobberMemory(); } - state.SetBytesProcessed(int64_t(state.iterations()) * nbytes); + state.SetBytesProcessed(state.iterations() * nbytes); } template @@ -156,8 +160,6 @@ static void BenchmarkGenerateBits(benchmark::State& state, int64_t nbytes) { const int64_t num_bits = nbytes * 8; uint8_t* bitmap = buffer->mutable_data(); - // pattern should be the same as in BenchmarkBitmapWriter - const bool pattern[] = {false, false, false, true, true, true}; while (state.KeepRunning()) { int64_t pattern_index = 0; @@ -171,26 +173,26 @@ static void BenchmarkGenerateBits(benchmark::State& state, int64_t nbytes) { GenerateBitsFunctorType()(bitmap, 0, num_bits, generate); benchmark::ClobberMemory(); } - state.SetBytesProcessed(2 * int64_t(state.iterations()) * nbytes); + state.SetBytesProcessed(state.iterations() * nbytes); } -static void BM_NaiveBitmapReader(benchmark::State& state) { +static void ReferenceNaiveBitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } -static void BM_BitmapReader(benchmark::State& state) { +static void RegressionBitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } -static void BM_NaiveBitmapWriter(benchmark::State& state) { +static void ReferenceNaiveBitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } -static void BM_BitmapWriter(benchmark::State& state) { +static void RegressionBitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } -static void BM_FirstTimeBitmapWriter(benchmark::State& state) { +static void RegressionFirstTimeBitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } @@ -208,72 +210,59 @@ struct GenerateBitsUnrolledFunctor { } }; -static void BM_GenerateBits(benchmark::State& state) { +static void RegressionGenerateBits(benchmark::State& state) { BenchmarkGenerateBits(state, state.range(0)); } -static void BM_GenerateBitsUnrolled(benchmark::State& state) { +static void RegressionGenerateBitsUnrolled(benchmark::State& state) { BenchmarkGenerateBits(state, state.range(0)); } -static void BM_CopyBitmap(benchmark::State& state) { // NOLINT non-const reference - const int kBufferSize = static_cast(state.range(0)); - std::shared_ptr buffer = CreateRandomBuffer(kBufferSize); +constexpr int64_t kBufferSize = 1024 * 8; + +template +static void RegressionCopyBitmap(benchmark::State& state) { // NOLINT non-const reference + const int64_t buffer_size = state.range(0); + const int64_t bits_size = buffer_size * 8; + std::shared_ptr buffer = CreateRandomBuffer(buffer_size); - const int num_bits = kBufferSize * 8; const uint8_t* src = buffer->data(); + const int64_t offset = Offset; + const int64_t length = bits_size - offset; std::shared_ptr copy; - while (state.KeepRunning()) { - ABORT_NOT_OK(CopyBitmap(default_memory_pool(), src, state.range(1), num_bits, ©)); + auto pool = default_memory_pool(); + ABORT_NOT_OK(AllocateEmptyBitmap(pool, length, ©)); + + for (auto _ : state) { + CopyBitmap(src, offset, length, copy->mutable_data(), 0, false); } - state.SetBytesProcessed(state.iterations() * kBufferSize * sizeof(int8_t)); + + state.SetBytesProcessed(state.iterations() * buffer_size); +} + +static void RegressionCopyBitmapWithoutOffset( + benchmark::State& state) { // NOLINT non-const reference + RegressionCopyBitmap<0>(state); } -BENCHMARK(BM_CopyBitmap) - ->Args({100000, 0}) - ->Args({1000000, 0}) - ->Args({100000, 4}) - ->Args({1000000, 4}) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_NaiveBitmapReader) - ->Args({1000000}) - ->MinTime(5.0) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BitmapReader)->Args({1000000})->MinTime(5.0)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_NaiveBitmapWriter) - ->Args({100000}) - ->Repetitions(2) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_BitmapWriter) - ->Args({100000}) - ->Repetitions(2) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_FirstTimeBitmapWriter) - ->Args({100000}) - ->Repetitions(2) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_GenerateBits) - ->Args({100000}) - ->Repetitions(2) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_GenerateBitsUnrolled) - ->Args({100000}) - ->Repetitions(2) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond); +// Trigger the slow path where the buffer is not byte aligned. +static void RegressionCopyBitmapWithOffset( + benchmark::State& state) { // NOLINT non-const reference + RegressionCopyBitmap<4>(state); +} + +BENCHMARK(RegressionCopyBitmapWithoutOffset)->Arg(kBufferSize); +BENCHMARK(RegressionCopyBitmapWithOffset)->Arg(kBufferSize); + +BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize); +BENCHMARK(RegressionBitmapReader)->Arg(kBufferSize); +BENCHMARK(ReferenceNaiveBitmapWriter)->Arg(kBufferSize); +BENCHMARK(RegressionBitmapWriter)->Arg(kBufferSize); + +BENCHMARK(RegressionFirstTimeBitmapWriter)->Arg(kBufferSize); +BENCHMARK(RegressionGenerateBits)->Arg(kBufferSize); +BENCHMARK(RegressionGenerateBitsUnrolled)->Arg(kBufferSize); } // namespace BitUtil } // namespace arrow From f207974c0b2a8c9efee6654971c48755fe376faf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Wed, 8 May 2019 16:08:44 -0400 Subject: [PATCH 03/14] Refactor CompareFilter benchmarks --- cpp/src/arrow/compute/kernels/filter-benchmark.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/filter-benchmark.cc b/cpp/src/arrow/compute/kernels/filter-benchmark.cc index 3826e261a7a..1bb9255e1ed 100644 --- a/cpp/src/arrow/compute/kernels/filter-benchmark.cc +++ b/cpp/src/arrow/compute/kernels/filter-benchmark.cc @@ -29,7 +29,7 @@ namespace arrow { namespace compute { -static void BenchCompareKernel(benchmark::State& state) { +static void RegressionCompareArrayScalarKernel(benchmark::State& state) { const int64_t memory_size = state.range(0) / 4; const int64_t array_size = memory_size / sizeof(int64_t); const double null_percent = static_cast(state.range(1)) / 100.0; @@ -37,7 +37,7 @@ static void BenchCompareKernel(benchmark::State& state) { auto array = std::static_pointer_cast>( rand.Int64(array_size, -100, 100, null_percent)); - CompareOptions ge(GREATER_EQUAL); + CompareOptions ge{GREATER_EQUAL}; FunctionContext ctx; for (auto _ : state) { @@ -51,7 +51,7 @@ static void BenchCompareKernel(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * array_size * sizeof(int64_t)); } -BENCHMARK(BenchCompareKernel)->Apply(BenchmarkSetArgs); +BENCHMARK(RegressionCompareArrayScalarKernel)->Apply(RegressionSetArgs); } // namespace compute } // namespace arrow From bea76ed0ee009a0abd2a0986a7fa32ffd91b721e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Thu, 9 May 2019 08:40:51 -0400 Subject: [PATCH 04/14] Refactor csv benchmarks --- cpp/src/arrow/compute/benchmark-util.h | 36 +++++----- cpp/src/arrow/csv/converter-benchmark.cc | 14 ++-- cpp/src/arrow/csv/parser-benchmark.cc | 90 +++++++++++------------- 3 files changed, 67 insertions(+), 73 deletions(-) diff --git a/cpp/src/arrow/compute/benchmark-util.h b/cpp/src/arrow/compute/benchmark-util.h index 865da6671e3..ee9cb9504a3 100644 --- a/cpp/src/arrow/compute/benchmark-util.h +++ b/cpp/src/arrow/compute/benchmark-util.h @@ -32,40 +32,42 @@ static const int64_t kL1Size = cpu_info->CacheSize(CpuInfo::L1_CACHE); static const int64_t kL2Size = cpu_info->CacheSize(CpuInfo::L2_CACHE); static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::L3_CACHE); static const int64_t kCantFitInL3Size = kL3Size * 4; +static const std::vector kMemorySizes = {kL1Size, kL2Size, kL3Size, + kCantFitInL3Size}; template struct BenchmarkArgsType; +// Pattern matching that extracts the vector element type of Benchmark::Args() template struct BenchmarkArgsType&)> { using type = Values; }; -void BenchmarkSetArgs(benchmark::internal::Benchmark* bench) { - // Benchmark changed its parameter type between releases from - // int to int64_t. As it doesn't have version macros, we need - // to apply C++ template magic. - using ArgsType = - typename BenchmarkArgsType::type; +// Benchmark changed its parameter type between releases from +// int to int64_t. As it doesn't have version macros, we need +// to apply C++ template magic. +using ArgsType = + typename BenchmarkArgsType::type; + +void BenchmarkSetArgsWithSizes(benchmark::internal::Benchmark* bench, + const std::vector& sizes = kMemorySizes) { bench->Unit(benchmark::kMicrosecond); - for (auto size : {kL1Size, kL2Size, kL3Size, kCantFitInL3Size}) + for (auto size : sizes) for (auto nulls : std::vector({0, 1, 10, 50})) bench->Args({static_cast(size), nulls}); } -void RegressionSetArgs(benchmark::internal::Benchmark* bench) { - // Benchmark changed its parameter type between releases from - // int to int64_t. As it doesn't have version macros, we need - // to apply C++ template magic. - using ArgsType = - typename BenchmarkArgsType::type; - bench->Unit(benchmark::kMicrosecond); +void BenchmarkSetArgs(benchmark::internal::Benchmark* bench) { + BenchmarkSetArgsWithSizes(bench, kMemorySizes); +} - // Regressions should only bench L1 data for better stability - for (auto nulls : std::vector({0, 1, 10, 50})) - bench->Args({static_cast(kL1Size), nulls}); +void RegressionSetArgs(benchmark::internal::Benchmark* bench) { + // Regression do not need to account for cache hierarchy, thus optimize for + // the best case. + BenchmarkSetArgsWithSizes(bench, {kL1Size}); } } // namespace compute diff --git a/cpp/src/arrow/csv/converter-benchmark.cc b/cpp/src/arrow/csv/converter-benchmark.cc index c43fce66b28..f58f47d0456 100644 --- a/cpp/src/arrow/csv/converter-benchmark.cc +++ b/cpp/src/arrow/csv/converter-benchmark.cc @@ -74,24 +74,26 @@ static void BenchmarkConversion(benchmark::State& state, // NOLINT non-const re state.SetItemsProcessed(state.iterations() * parser.num_rows()); } -static void BM_Int64Conversion(benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 10000; +constexpr size_t num_rows = 10000; + +static void RegressionInt64Conversion( + benchmark::State& state) { // NOLINT non-const reference auto parser = BuildInt64Data(num_rows); auto options = ConvertOptions::Defaults(); BenchmarkConversion(state, *parser, int64(), options); } -static void BM_FloatConversion(benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 10000; +static void RegressionFloatConversion( + benchmark::State& state) { // NOLINT non-const reference auto parser = BuildFloatData(num_rows); auto options = ConvertOptions::Defaults(); BenchmarkConversion(state, *parser, float64(), options); } -BENCHMARK(BM_Int64Conversion)->Repetitions(3); -BENCHMARK(BM_FloatConversion)->Repetitions(3); +BENCHMARK(RegressionInt64Conversion); +BENCHMARK(RegressionFloatConversion); } // namespace csv } // namespace arrow diff --git a/cpp/src/arrow/csv/parser-benchmark.cc b/cpp/src/arrow/csv/parser-benchmark.cc index 8dcb06bd3cc..84b955eb904 100644 --- a/cpp/src/arrow/csv/parser-benchmark.cc +++ b/cpp/src/arrow/csv/parser-benchmark.cc @@ -28,20 +28,16 @@ namespace arrow { namespace csv { -static std::string BuildQuotedData(int32_t num_rows = 10000) { - std::string one_row = "abc,\"d,f\",12.34,\n"; - std::stringstream ss; - for (int32_t i = 0; i < num_rows; ++i) { - ss << one_row; - } - return ss.str(); -} +// Can't have static str. +const char* one_row = "abc,\"d,f\",12.34,\n"; +const char* one_row_escaped = "abc,d\\,f,12.34,\n"; + +size_t num_rows = (1024 * 8) / strlen(one_row); -static std::string BuildEscapedData(int32_t num_rows = 10000) { - std::string one_row = "abc,d\\,f,12.34,\n"; +static std::string BuildCsvData(const std::string& row, size_t repeat) { std::stringstream ss; - for (int32_t i = 0; i < num_rows; ++i) { - ss << one_row; + for (size_t i = 0; i < repeat; ++i) { + ss << row; } return ss.str(); } @@ -49,23 +45,20 @@ static std::string BuildEscapedData(int32_t num_rows = 10000) { static void BenchmarkCSVChunking(benchmark::State& state, // NOLINT non-const reference const std::string& csv, ParseOptions options) { Chunker chunker(options); + const uint32_t csv_size = static_cast(csv.size()); while (state.KeepRunning()) { - uint32_t chunk_size; - ABORT_NOT_OK( - chunker.Process(csv.data(), static_cast(csv.size()), &chunk_size)); - if (chunk_size != csv.size()) { - std::cerr << "Parsing incomplete\n"; - std::abort(); - } + uint32_t chunk_size = 0; + ABORT_NOT_OK(chunker.Process(csv.data(), csv_size, &chunk_size)); + benchmark::DoNotOptimize(chunk_size); } - state.SetBytesProcessed(state.iterations() * csv.size()); + + state.SetBytesProcessed(state.iterations() * csv_size); } -static void BM_ChunkCSVQuotedBlock( +static void RegressionChunkCSVQuotedBlock( benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 5000; - auto csv = BuildQuotedData(num_rows); + auto csv = BuildCsvData(one_row, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; options.escaping = false; @@ -74,10 +67,9 @@ static void BM_ChunkCSVQuotedBlock( BenchmarkCSVChunking(state, csv, options); } -static void BM_ChunkCSVEscapedBlock( +static void RegressionChunkCSVEscapedBlock( benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 5000; - auto csv = BuildEscapedData(num_rows); + auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = false; options.escaping = true; @@ -86,31 +78,30 @@ static void BM_ChunkCSVEscapedBlock( BenchmarkCSVChunking(state, csv, options); } -static void BM_ChunkCSVNoNewlinesBlock( +static void RegressionChunkCSVNoNewlinesBlock( benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 5000; - auto csv = BuildEscapedData(num_rows); + auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; options.escaping = false; options.newlines_in_values = false; BenchmarkCSVChunking(state, csv, options); + // Provides better regression stability with timings rather than bogus + // bandwidth. + state.SetBytesProcessed(0); } static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const reference - const std::string& csv, int32_t num_rows, + const std::string& csv, int32_t rows, ParseOptions options) { - BlockParser parser(options, -1, num_rows + 1); + BlockParser parser(options, -1, rows + 1); + const uint32_t csv_size = static_cast(csv.size()); while (state.KeepRunning()) { - uint32_t parsed_size; - ABORT_NOT_OK( - parser.Parse(csv.data(), static_cast(csv.size()), &parsed_size)); - if (parsed_size != csv.size() || parser.num_rows() != num_rows) { - std::cerr << "Parsing incomplete\n"; - std::abort(); - } + uint32_t parsed_size = 0; + ABORT_NOT_OK(parser.Parse(csv.data(), csv_size, &parsed_size)); + // Include performance of visiting the parsed values, as that might // vary depending on the parser's internal data structures. bool dummy_quoted = false; @@ -126,13 +117,13 @@ static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const re benchmark::DoNotOptimize(dummy_quoted); } } - state.SetBytesProcessed(state.iterations() * csv.size()); + + state.SetBytesProcessed(state.iterations() * csv_size); } -static void BM_ParseCSVQuotedBlock( +static void RegressionParseCSVQuotedBlock( benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 5000; - auto csv = BuildQuotedData(num_rows); + auto csv = BuildCsvData(one_row, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; options.escaping = false; @@ -140,10 +131,9 @@ static void BM_ParseCSVQuotedBlock( BenchmarkCSVParsing(state, csv, num_rows, options); } -static void BM_ParseCSVEscapedBlock( +static void RegressionParseCSVEscapedBlock( benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 5000; - auto csv = BuildEscapedData(num_rows); + auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = false; options.escaping = true; @@ -151,11 +141,11 @@ static void BM_ParseCSVEscapedBlock( BenchmarkCSVParsing(state, csv, num_rows, options); } -BENCHMARK(BM_ChunkCSVQuotedBlock)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_ChunkCSVEscapedBlock)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_ChunkCSVNoNewlinesBlock)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_ParseCSVQuotedBlock)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_ParseCSVEscapedBlock)->Repetitions(3)->Unit(benchmark::kMicrosecond); +BENCHMARK(RegressionChunkCSVQuotedBlock); +BENCHMARK(RegressionChunkCSVEscapedBlock); +BENCHMARK(RegressionChunkCSVNoNewlinesBlock); +BENCHMARK(RegressionParseCSVQuotedBlock); +BENCHMARK(RegressionParseCSVEscapedBlock); } // namespace csv } // namespace arrow From a7b4f5f50d1e478565c572c277b6ebce2ff632fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Thu, 9 May 2019 08:41:27 -0400 Subject: [PATCH 05/14] Improve archery benchmark support - Add '\n' do diff json output adhering to jsonlines - Add support for items_per_second metrics - Add `--pdb` option to drop a pdb shell on uncaught exception --- dev/archery/archery/benchmark/google.py | 21 +++++++++++++++------ dev/archery/archery/cli.py | 10 ++++++++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index bd2793eb4e8..c783e4e6ce2 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -87,13 +87,14 @@ class GoogleBenchmarkObservation: """ def __init__(self, name, real_time, cpu_time, time_unit, size=None, - bytes_per_second=None, **kwargs): + bytes_per_second=None, items_per_second=None, **kwargs): self._name = name self.real_time = real_time self.cpu_time = cpu_time self.time_unit = time_unit self.size = size self.bytes_per_second = bytes_per_second + self.items_per_second = items_per_second @property def is_agg(self): @@ -118,11 +119,21 @@ def time(self): @property def value(self): """ Return the benchmark value.""" - return self.bytes_per_second if self.size else self.time + if self.bytes_per_second: + return self.bytes_per_second + elif self.items_per_second: + return self.items_per_second + else: + return self.time @property def unit(self): - return "bytes_per_second" if self.size else self.time_unit + if self.bytes_per_second: + return "bytes_per_second" + elif self.items_per_second: + return "items_per_second" + else: + return self.time_unit def __repr__(self): return f"{self.value}" @@ -147,9 +158,7 @@ def __init__(self, name, runs): _, runs = partition(lambda b: b.is_agg, runs) self.runs = sorted(runs, key=lambda b: b.value) unit = self.runs[0].unit - # If `size` is found in the json dict, then the benchmark is reported - # in bytes per second - less_is_better = self.runs[0].size is None + less_is_better = not unit.endswith("per_second") values = [b.value for b in self.runs] super().__init__(name, unit, less_is_better, values) diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 0178d58a03f..11f8dfe0179 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -37,10 +37,12 @@ @click.group() @click.option("--debug", type=bool, is_flag=True, default=False, help="Increase logging with debugging output.") +@click.option("--pdb", type=bool, is_flag=True, default=False, + help="Invoke pdb on uncaught exception.") @click.option("-q", "--quiet", type=bool, is_flag=True, default=False, help="Silence executed commands.") @click.pass_context -def archery(ctx, debug, quiet): +def archery(ctx, debug, pdb, quiet): """ Apache Arrow developer utilities. See sub-commands help with `archery --help`. @@ -53,6 +55,10 @@ def archery(ctx, debug, quiet): if debug: logger.setLevel(logging.DEBUG) + if pdb: + import pdb + sys.excepthook = lambda t, v, e: pdb.pm() + def validate_arrow_sources(ctx, param, src): """ Ensure a directory contains Arrow cpp sources. """ @@ -357,7 +363,7 @@ def benchmark_diff(ctx, src, preserve, suite_filter, benchmark_filter, for comparator in runner_comp.comparisons: regressions += comparator.regression json.dump(comparator, output, cls=JsonEncoder) - output.write('\n') + output.write("\n") sys.exit(regressions) From e0876474b7eada231cac303e9b9c05a2dee1a73a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Thu, 9 May 2019 10:58:59 -0400 Subject: [PATCH 06/14] Refactor JSON benchmarks - Use a single data generator - Fix multithread input size - Exclude multithread from regressions --- cpp/src/arrow/json/parser-benchmark.cc | 141 +++++++++++-------------- 1 file changed, 63 insertions(+), 78 deletions(-) diff --git a/cpp/src/arrow/json/parser-benchmark.cc b/cpp/src/arrow/json/parser-benchmark.cc index b186f069612..5025c29c49e 100644 --- a/cpp/src/arrow/json/parser-benchmark.cc +++ b/cpp/src/arrow/json/parser-benchmark.cc @@ -30,57 +30,64 @@ namespace arrow { namespace json { -static void BenchmarkJSONChunking(benchmark::State& state, // NOLINT non-const reference +std::shared_ptr TestSchema() { + return schema({field("int", int32()), field("str", utf8())}); +} + +constexpr int seed = 0x432432; + +std::string TestJsonData(int num_rows, bool pretty = false) { + std::default_random_engine engine(seed); + std::string json; + for (int i = 0; i < num_rows; ++i) { + StringBuffer sb; + Writer writer(sb); + ABORT_NOT_OK(Generate(TestSchema(), engine, &writer)); + json += pretty ? PrettyPrint(sb.GetString()) : sb.GetString(); + json += "\n"; + } + + return json; +} + +static void BenchmarkJSONChunking(benchmark::State& state, const std::shared_ptr& json, - ParseOptions options) { + ParseOptions options) { // NOLINT non-const reference auto chunker = Chunker::Make(options); + for (auto _ : state) { std::shared_ptr chunked, partial; ABORT_NOT_OK(chunker->Process(json, &chunked, &partial)); } + state.SetBytesProcessed(state.iterations() * json->size()); } -static void BM_ChunkJSONPrettyPrinted( +static void RegressionChunkJSONPrettyPrinted( benchmark::State& state) { // NOLINT non-const reference const int32_t num_rows = 5000; + auto options = ParseOptions::Defaults(); options.newlines_in_values = true; - options.explicit_schema = schema({field("int", int32()), field("str", utf8())}); - std::default_random_engine engine; - std::string json; - for (int i = 0; i < num_rows; ++i) { - StringBuffer sb; - Writer writer(sb); - ABORT_NOT_OK(Generate(options.explicit_schema, engine, &writer)); - json += PrettyPrint(sb.GetString()); - json += "\n"; - } + options.explicit_schema = TestSchema(); + + auto json = TestJsonData(num_rows, /* pretty */ true); BenchmarkJSONChunking(state, std::make_shared(json), options); } -BENCHMARK(BM_ChunkJSONPrettyPrinted)->MinTime(1.0)->Unit(benchmark::kMicrosecond); - -static void BM_ChunkJSONLineDelimited( +static void RegressionChunkJSONLineDelimited( benchmark::State& state) { // NOLINT non-const reference const int32_t num_rows = 5000; + auto options = ParseOptions::Defaults(); options.newlines_in_values = false; - options.explicit_schema = schema({field("int", int32()), field("str", utf8())}); - std::default_random_engine engine; - std::string json; - for (int i = 0; i < num_rows; ++i) { - StringBuffer sb; - Writer writer(sb); - ABORT_NOT_OK(Generate(options.explicit_schema, engine, &writer)); - json += sb.GetString(); - json += "\n"; - } + options.explicit_schema = TestSchema(); + + auto json = TestJsonData(num_rows); BenchmarkJSONChunking(state, std::make_shared(json), options); + state.SetBytesProcessed(0); } -BENCHMARK(BM_ChunkJSONLineDelimited)->MinTime(1.0)->Unit(benchmark::kMicrosecond); - static void BenchmarkJSONParsing(benchmark::State& state, // NOLINT non-const reference const std::shared_ptr& json, int32_t num_rows, ParseOptions options) { @@ -88,38 +95,24 @@ static void BenchmarkJSONParsing(benchmark::State& state, // NOLINT non-const r std::unique_ptr parser; ABORT_NOT_OK(BlockParser::Make(options, &parser)); ABORT_NOT_OK(parser->Parse(json)); - if (parser->num_rows() != num_rows) { - std::cerr << "Parsing incomplete\n"; - std::abort(); - } + std::shared_ptr parsed; ABORT_NOT_OK(parser->Finish(&parsed)); } state.SetBytesProcessed(state.iterations() * json->size()); } -static void BM_ParseJSONBlockWithSchema( +static void RegressionParseJSONBlockWithSchema( benchmark::State& state) { // NOLINT non-const reference const int32_t num_rows = 5000; auto options = ParseOptions::Defaults(); options.unexpected_field_behavior = UnexpectedFieldBehavior::Error; - options.explicit_schema = schema({field("int", int32()), field("str", utf8())}); - std::default_random_engine engine; - std::string json; - for (int i = 0; i < num_rows; ++i) { - StringBuffer sb; - Writer writer(sb); - ABORT_NOT_OK(Generate(options.explicit_schema, engine, &writer)); - json += sb.GetString(); - json += "\n"; - } + options.explicit_schema = TestSchema(); + + auto json = TestJsonData(num_rows); BenchmarkJSONParsing(state, std::make_shared(json), num_rows, options); } -BENCHMARK(BM_ParseJSONBlockWithSchema)->MinTime(1.0)->Unit(benchmark::kMicrosecond); - -std::shared_ptr tables[2]; - static void BenchmarkJSONReading(benchmark::State& state, // NOLINT non-const reference const std::string& json, int32_t num_rows, ReadOptions read_options, ParseOptions parse_options) { @@ -133,49 +126,41 @@ static void BenchmarkJSONReading(benchmark::State& state, // NOLINT non-const r std::shared_ptr
table; ABORT_NOT_OK(reader->Read(&table)); - - if (table->num_rows() != num_rows) { - std::cerr << "Parsing incomplete\n"; - std::abort(); - } - - tables[read_options.use_threads] = table; } - state.SetBytesProcessed(state.iterations() * json.size()); - if (tables[false] && tables[true]) { - AssertTablesEqual(*tables[false], *tables[true]); - } + state.SetBytesProcessed(state.iterations() * json.size()); } -static void BM_ReadJSONBlockWithSchema( - benchmark::State& state) { // NOLINT non-const reference - const int32_t num_rows = 50000; +static void BenchmarkReadJSONBlockWithSchema( + benchmark::State& state, bool use_threads) { // NOLINT non-const reference + const int32_t num_rows = 500000; auto read_options = ReadOptions::Defaults(); - read_options.use_threads = state.range(0); + read_options.use_threads = use_threads; auto parse_options = ParseOptions::Defaults(); parse_options.unexpected_field_behavior = UnexpectedFieldBehavior::Error; - parse_options.explicit_schema = schema({field("int", int32()), field("str", utf8())}); + parse_options.explicit_schema = TestSchema(); - std::default_random_engine engine; - std::string json; - for (int i = 0; i < num_rows; ++i) { - StringBuffer sb; - Writer writer(sb); - ABORT_NOT_OK(Generate(parse_options.explicit_schema, engine, &writer)); - json += sb.GetString(); - json += "\n"; - } + auto json = TestJsonData(num_rows); BenchmarkJSONReading(state, json, num_rows, read_options, parse_options); } -BENCHMARK(BM_ReadJSONBlockWithSchema) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond) - ->Arg(true) - ->Arg(false) - ->UseRealTime(); +static void RegressionReadJSONBlockWithSchemaSingleThread( + benchmark::State& state) { // NOLINT non-const reference + BenchmarkReadJSONBlockWithSchema(state, false); +} + +static void ReferenceReadJSONBlockWithSchemaMultiThread( + benchmark::State& state) { // NOLINT non-const reference + BenchmarkReadJSONBlockWithSchema(state, true); +} + +BENCHMARK(RegressionChunkJSONPrettyPrinted); +BENCHMARK(RegressionChunkJSONLineDelimited); +BENCHMARK(RegressionParseJSONBlockWithSchema); + +BENCHMARK(RegressionReadJSONBlockWithSchemaSingleThread); +BENCHMARK(ReferenceReadJSONBlockWithSchemaMultiThread)->UseRealTime(); } // namespace json } // namespace arrow From ffe723f054bbd23d8aa3edfb08186cacc7237363 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Tue, 21 May 2019 11:27:36 -0400 Subject: [PATCH 07/14] Add `benchmark list` sub-command. --- dev/archery/archery/benchmark/runner.py | 7 +++++ dev/archery/archery/cli.py | 35 ++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index dbbb3f5713c..099fa68c497 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -146,6 +146,13 @@ def suite(self, name, suite_bin): benchmarks = GoogleBenchmark.from_json(results.get("benchmarks")) return BenchmarkSuite(name, benchmarks) + @property + def list(self): + for suite_name, suite_bin in self.suites_binaries.items(): + suite_cmd = GoogleBenchmarkCommand(suite_bin) + for benchmark_name in suite_cmd.list_benchmarks(): + yield f"{suite_name}.{benchmark_name}" + @property def suites(self): """ Returns all suite for a runner. """ diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 11f8dfe0179..e9221fe14f7 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -173,6 +173,39 @@ def benchmark(ctx): pass +@benchmark.command(name="list", short_help="List benchmark suite") +@click.option("--src", metavar="", show_default=True, + default=ArrowSources.find(), + callback=validate_arrow_sources, + help="Specify Arrow source directory") +@click.option("--preserve", type=bool, default=False, show_default=True, + is_flag=True, help="Preserve workspace for investigation.") +@click.option("--output", metavar="", + type=click.File("w", encoding="utf8"), default="-", + help="Capture output result into file.") +@click.option("--cmake-extras", type=str, multiple=True, + help="Extra flags/options to pass to cmake invocation. " + "Can be stacked") +@click.argument("baseline", metavar="[]", default="WORKSPACE", + required=False) +@click.pass_context +def benchmark_list(ctx, src, preserve, output, cmake_extras, baseline): + """ List benchmark suite. + """ + with tmpdir(preserve) as root: + logger.debug(f"Running benchmark {baseline}") + + conf = CppConfiguration( + build_type="release", with_tests=True, with_benchmarks=True, + with_python=False, cmake_extras=cmake_extras) + + runner_base = BenchmarkRunner.from_rev_or_path( + src, root, baseline, conf) + + for b in runner_base.list: + print(b, file=output) + + @benchmark.command(name="run", short_help="Run benchmark suite") @click.option("--src", metavar="", show_default=True, default=ArrowSources.find(), @@ -191,7 +224,7 @@ def benchmark(ctx): @click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " "Can be stacked") -@click.argument("baseline", metavar="[]]", default="master", +@click.argument("baseline", metavar="[]", default="WORKSPACE", required=False) @click.pass_context def benchmark_run(ctx, src, preserve, suite_filter, benchmark_filter, From 908f05a5b6f8d019dd4ed93010379a8b092c83d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Wed, 22 May 2019 08:42:21 -0400 Subject: [PATCH 08/14] Change default repetitions to 10 instead of 20 --- dev/archery/archery/benchmark/google.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index c783e4e6ce2..8ef25b3de28 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -30,6 +30,9 @@ def partition(pred, iterable): return list(filter(pred, t1)), list(filterfalse(pred, t2)) +DEFAULT_REPETITIONS = 10 + + class GoogleBenchmarkCommand(Command): """ Run a google benchmark binary. @@ -49,9 +52,9 @@ def list_benchmarks(self): stderr=subprocess.PIPE) return str.splitlines(result.stdout.decode("utf-8")) - def results(self): + def results(self, repetitions=DEFAULT_REPETITIONS): with NamedTemporaryFile() as out: - argv = ["--benchmark_repetitions=20", + argv = [f"--benchmark_repetitions={repetitions}", f"--benchmark_out={out.name}", "--benchmark_out_format=json"] From 5cebe2c233b5af8db1497870b1e0f23910147d10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Wed, 22 May 2019 13:46:08 -0400 Subject: [PATCH 09/14] Normalize benchmarks - Favor external repetitions over manual repetitions and mintime when possible. - Add cmake ARROW_BUILD_BENCHMARKS_REFERENCE to toggle reference benchmarks. - Remove default benchmark filter of `^Regression`. - Remove Regression prefix from benchmark --- cpp/CMakeLists.txt | 3 ++ cpp/cmake_modules/DefineOptions.cmake | 6 +++ cpp/src/arrow/builder-benchmark.cc | 48 +++++++++---------- cpp/src/arrow/compute/compute-benchmark.cc | 21 +++----- .../compute/kernels/aggregate-benchmark.cc | 26 +++++----- .../arrow/compute/kernels/filter-benchmark.cc | 4 +- cpp/src/arrow/csv/converter-benchmark.cc | 8 ++-- cpp/src/arrow/csv/parser-benchmark.cc | 20 ++++---- cpp/src/arrow/gpu/cuda-benchmark.cc | 2 - cpp/src/arrow/io/file-benchmark.cc | 32 +++---------- cpp/src/arrow/io/memory-benchmark.cc | 2 + cpp/src/arrow/ipc/read-write-benchmark.cc | 13 +---- cpp/src/arrow/json/parser-benchmark.cc | 16 +++---- cpp/src/arrow/util/bit-util-benchmark.cc | 39 +++++++-------- cpp/src/arrow/util/compression-benchmark.cc | 34 ++++--------- cpp/src/arrow/util/decimal-benchmark.cc | 2 +- cpp/src/arrow/util/hashing-benchmark.cc | 13 ++--- cpp/src/arrow/util/int-util-benchmark.cc | 11 ++--- cpp/src/arrow/util/lazy-benchmark.cc | 16 ++++--- cpp/src/arrow/util/machine-benchmark.cc | 6 ++- cpp/src/arrow/util/thread-pool-benchmark.cc | 26 ++++------ cpp/src/arrow/util/trie-benchmark.cc | 10 ++-- cpp/src/arrow/util/utf8-util-benchmark.cc | 21 ++++---- dev/archery/archery/cli.py | 2 +- 24 files changed, 162 insertions(+), 219 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 4c70a388faf..21043fc9870 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -269,6 +269,9 @@ else() add_custom_target(all-benchmarks) add_custom_target(benchmark ctest -L benchmark) add_dependencies(benchmark all-benchmarks) + if(ARROW_BUILD_BENCHMARKS_REFERENCE) + add_definitions(-DARROW_WITH_BENCHMARKS_REFERENCE) + endif() endif() if(NOT ARROW_BUILD_EXAMPLES) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 8ea7346cc98..54844927bb1 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -96,6 +96,12 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") define_option(ARROW_BUILD_BENCHMARKS "Build the Arrow micro benchmarks, default OFF" OFF) + # Reference benchmarks are used to compare to naive implementation, or + # discover various hardware limits. + define_option(ARROW_BUILD_BENCHMARKS_REFERENCE + "Build the Arrow micro reference benchmarks, default OFF." + OFF) + define_option_string(ARROW_TEST_LINKAGE "Linkage of Arrow libraries with unit tests executables." "shared" diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index b3ddf46030e..c5e8093d72d 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -70,7 +70,7 @@ static void ReferenceBuildVectorNoNulls( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildPrimitiveArrayNoNulls( +static void BuildPrimitiveArrayNoNulls( benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { Int64Builder builder; @@ -86,7 +86,7 @@ static void RegressionBuildPrimitiveArrayNoNulls( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildAdaptiveIntNoNulls( +static void BuildAdaptiveIntNoNulls( benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { AdaptiveIntBuilder builder; @@ -102,7 +102,7 @@ static void RegressionBuildAdaptiveIntNoNulls( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildAdaptiveIntNoNullsScalarAppend( +static void BuildAdaptiveIntNoNullsScalarAppend( benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { AdaptiveIntBuilder builder; @@ -120,7 +120,7 @@ static void RegressionBuildAdaptiveIntNoNullsScalarAppend( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildBooleanArrayNoNulls( +static void BuildBooleanArrayNoNulls( benchmark::State& state) { // NOLINT non-const reference size_t n_bytes = kData.size() * sizeof(ValueType); @@ -140,7 +140,7 @@ static void RegressionBuildBooleanArrayNoNulls( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildBinaryArray( +static void BuildBinaryArray( benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { BinaryBuilder builder; @@ -156,7 +156,7 @@ static void RegressionBuildBinaryArray( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildChunkedBinaryArray( +static void BuildChunkedBinaryArray( benchmark::State& state) { // NOLINT non-const reference // 1MB chunks const int32_t kChunkSize = 1 << 20; @@ -175,7 +175,7 @@ static void RegressionBuildChunkedBinaryArray( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildFixedSizeBinaryArray( +static void BuildFixedSizeBinaryArray( benchmark::State& state) { // NOLINT non-const reference auto type = fixed_size_binary(kBinaryView.size()); @@ -306,25 +306,25 @@ static void BenchmarkScalarDictionaryArray( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void RegressionBuildInt64DictionaryArrayRandom( +static void BuildInt64DictionaryArrayRandom( benchmark::State& state) { // NOLINT non-const reference const auto fodder = MakeRandomIntDictFodder(); BenchmarkScalarDictionaryArray>(state, fodder); } -static void RegressionBuildInt64DictionaryArraySequential( +static void BuildInt64DictionaryArraySequential( benchmark::State& state) { // NOLINT non-const reference const auto fodder = MakeSequentialIntDictFodder(); BenchmarkScalarDictionaryArray>(state, fodder); } -static void RegressionBuildInt64DictionaryArraySimilar( +static void BuildInt64DictionaryArraySimilar( benchmark::State& state) { // NOLINT non-const reference const auto fodder = MakeSimilarIntDictFodder(); BenchmarkScalarDictionaryArray>(state, fodder); } -static void RegressionBuildStringDictionaryArray( +static void BuildStringDictionaryArray( benchmark::State& state) { // NOLINT non-const reference const auto fodder = MakeStringDictFodder(); auto fodder_size = @@ -347,7 +347,7 @@ static void RegressionBuildStringDictionaryArray( state.SetBytesProcessed(state.iterations() * fodder_size * kFinalSize); } -static void BM_ArrayDataConstructDestruct( +static void ArrayDataConstructDestruct( benchmark::State& state) { // NOLINT non-const reference std::vector> arrays; @@ -370,21 +370,21 @@ static void BM_ArrayDataConstructDestruct( BENCHMARK(ReferenceBuildVectorNoNulls); -BENCHMARK(RegressionBuildBooleanArrayNoNulls); +BENCHMARK(BuildBooleanArrayNoNulls); -BENCHMARK(RegressionBuildPrimitiveArrayNoNulls); -BENCHMARK(RegressionBuildAdaptiveIntNoNulls); -BENCHMARK(RegressionBuildAdaptiveIntNoNullsScalarAppend); +BENCHMARK(BuildPrimitiveArrayNoNulls); +BENCHMARK(BuildAdaptiveIntNoNulls); +BENCHMARK(BuildAdaptiveIntNoNullsScalarAppend); -BENCHMARK(RegressionBuildBinaryArray); -BENCHMARK(RegressionBuildChunkedBinaryArray); -BENCHMARK(RegressionBuildFixedSizeBinaryArray); +BENCHMARK(BuildBinaryArray); +BENCHMARK(BuildChunkedBinaryArray); +BENCHMARK(BuildFixedSizeBinaryArray); -BENCHMARK(RegressionBuildInt64DictionaryArrayRandom); -BENCHMARK(RegressionBuildInt64DictionaryArraySequential); -BENCHMARK(RegressionBuildInt64DictionaryArraySimilar); -BENCHMARK(RegressionBuildStringDictionaryArray); +BENCHMARK(BuildInt64DictionaryArrayRandom); +BENCHMARK(BuildInt64DictionaryArraySequential); +BENCHMARK(BuildInt64DictionaryArraySimilar); +BENCHMARK(BuildStringDictionaryArray); -BENCHMARK(BM_ArrayDataConstructDestruct); +BENCHMARK(ArrayDataConstructDestruct); } // namespace arrow diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index c14f706c445..7650b475748 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -198,19 +198,14 @@ static void BM_UniqueString100bytes(benchmark::State& state) { BenchUnique(state, HashParams{0.05, 100}, state.range(0), state.range(1)); } -BENCHMARK(BM_BuildDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildStringDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_BuildDictionary); +BENCHMARK(BM_BuildStringDictionary); -constexpr int kHashBenchmarkLength = 1 << 24; +constexpr int kHashBenchmarkLength = 1 << 22; #define ADD_HASH_ARGS(WHAT) \ - WHAT->Args({kHashBenchmarkLength, 50}) \ - ->Args({kHashBenchmarkLength, 1 << 10}) \ + WHAT->Args({kHashBenchmarkLength, 1 << 10}) \ ->Args({kHashBenchmarkLength, 10 * 1 << 10}) \ - ->Args({kHashBenchmarkLength, 1 << 20}) \ - ->MinTime(1.0) \ - ->Unit(benchmark::kMicrosecond) \ - ->UseRealTime() ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64NoNulls)); ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64WithNulls)); @@ -219,15 +214,11 @@ ADD_HASH_ARGS(BENCHMARK(BM_UniqueString100bytes)); BENCHMARK(BM_UniqueUInt8NoNulls) ->Args({kHashBenchmarkLength, 200}) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond) - ->UseRealTime(); + ->Unit(benchmark::kMicrosecond); BENCHMARK(BM_UniqueUInt8WithNulls) ->Args({kHashBenchmarkLength, 200}) - ->MinTime(1.0) - ->Unit(benchmark::kMicrosecond) - ->UseRealTime(); + ->Unit(benchmark::kMicrosecond); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/aggregate-benchmark.cc b/cpp/src/arrow/compute/kernels/aggregate-benchmark.cc index 085843e700d..cc2d3aa58dd 100644 --- a/cpp/src/arrow/compute/kernels/aggregate-benchmark.cc +++ b/cpp/src/arrow/compute/kernels/aggregate-benchmark.cc @@ -37,6 +37,8 @@ namespace compute { #include #include +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + namespace BitUtil = arrow::BitUtil; using arrow::internal::BitmapReader; @@ -273,7 +275,7 @@ struct SumBitmapVectorizeUnroll : public Summer { }; template -void BenchSum(benchmark::State& state) { +void ReferenceSum(benchmark::State& state) { using T = typename Functor::ValueType; const int64_t array_size = state.range(0) / sizeof(int64_t); @@ -295,15 +297,17 @@ void BenchSum(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * array_size * sizeof(T)); } -BENCHMARK_TEMPLATE(BenchSum, SumNoNulls)->Apply(BenchmarkSetArgs); -BENCHMARK_TEMPLATE(BenchSum, SumNoNullsUnrolled)->Apply(BenchmarkSetArgs); -BENCHMARK_TEMPLATE(BenchSum, SumSentinel)->Apply(BenchmarkSetArgs); -BENCHMARK_TEMPLATE(BenchSum, SumSentinelUnrolled)->Apply(BenchmarkSetArgs); -BENCHMARK_TEMPLATE(BenchSum, SumBitmapNaive)->Apply(BenchmarkSetArgs); -BENCHMARK_TEMPLATE(BenchSum, SumBitmapReader)->Apply(BenchmarkSetArgs); -BENCHMARK_TEMPLATE(BenchSum, SumBitmapVectorizeUnroll)->Apply(BenchmarkSetArgs); - -static void RegressionSumKernel(benchmark::State& state) { +BENCHMARK_TEMPLATE(ReferenceSum, SumNoNulls)->Apply(BenchmarkSetArgs); +BENCHMARK_TEMPLATE(ReferenceSum, SumNoNullsUnrolled)->Apply(BenchmarkSetArgs); +BENCHMARK_TEMPLATE(ReferenceSum, SumSentinel)->Apply(BenchmarkSetArgs); +BENCHMARK_TEMPLATE(ReferenceSum, SumSentinelUnrolled)->Apply(BenchmarkSetArgs); +BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapNaive)->Apply(BenchmarkSetArgs); +BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapReader)->Apply(BenchmarkSetArgs); +BENCHMARK_TEMPLATE(ReferenceSum, SumBitmapVectorizeUnroll) + ->Apply(BenchmarkSetArgs); +#endif // ARROW_WITH_BENCHMARKS_REFERENCE + +static void SumKernel(benchmark::State& state) { const int64_t array_size = state.range(0) / sizeof(int64_t); const double null_percent = static_cast(state.range(1)) / 100.0; auto rand = random::RandomArrayGenerator(1923); @@ -322,7 +326,7 @@ static void RegressionSumKernel(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * array_size * sizeof(int64_t)); } -BENCHMARK(RegressionSumKernel)->Apply(RegressionSetArgs); +BENCHMARK(SumKernel)->Apply(RegressionSetArgs); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/filter-benchmark.cc b/cpp/src/arrow/compute/kernels/filter-benchmark.cc index 1bb9255e1ed..24e18415ac8 100644 --- a/cpp/src/arrow/compute/kernels/filter-benchmark.cc +++ b/cpp/src/arrow/compute/kernels/filter-benchmark.cc @@ -29,7 +29,7 @@ namespace arrow { namespace compute { -static void RegressionCompareArrayScalarKernel(benchmark::State& state) { +static void CompareArrayScalarKernel(benchmark::State& state) { const int64_t memory_size = state.range(0) / 4; const int64_t array_size = memory_size / sizeof(int64_t); const double null_percent = static_cast(state.range(1)) / 100.0; @@ -51,7 +51,7 @@ static void RegressionCompareArrayScalarKernel(benchmark::State& state) { state.SetBytesProcessed(state.iterations() * array_size * sizeof(int64_t)); } -BENCHMARK(RegressionCompareArrayScalarKernel)->Apply(RegressionSetArgs); +BENCHMARK(CompareArrayScalarKernel)->Apply(RegressionSetArgs); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/csv/converter-benchmark.cc b/cpp/src/arrow/csv/converter-benchmark.cc index f58f47d0456..1db32140764 100644 --- a/cpp/src/arrow/csv/converter-benchmark.cc +++ b/cpp/src/arrow/csv/converter-benchmark.cc @@ -76,7 +76,7 @@ static void BenchmarkConversion(benchmark::State& state, // NOLINT non-const re constexpr size_t num_rows = 10000; -static void RegressionInt64Conversion( +static void Int64Conversion( benchmark::State& state) { // NOLINT non-const reference auto parser = BuildInt64Data(num_rows); auto options = ConvertOptions::Defaults(); @@ -84,7 +84,7 @@ static void RegressionInt64Conversion( BenchmarkConversion(state, *parser, int64(), options); } -static void RegressionFloatConversion( +static void FloatConversion( benchmark::State& state) { // NOLINT non-const reference auto parser = BuildFloatData(num_rows); auto options = ConvertOptions::Defaults(); @@ -92,8 +92,8 @@ static void RegressionFloatConversion( BenchmarkConversion(state, *parser, float64(), options); } -BENCHMARK(RegressionInt64Conversion); -BENCHMARK(RegressionFloatConversion); +BENCHMARK(Int64Conversion); +BENCHMARK(FloatConversion); } // namespace csv } // namespace arrow diff --git a/cpp/src/arrow/csv/parser-benchmark.cc b/cpp/src/arrow/csv/parser-benchmark.cc index 84b955eb904..31250f0fb8b 100644 --- a/cpp/src/arrow/csv/parser-benchmark.cc +++ b/cpp/src/arrow/csv/parser-benchmark.cc @@ -56,7 +56,7 @@ static void BenchmarkCSVChunking(benchmark::State& state, // NOLINT non-const r state.SetBytesProcessed(state.iterations() * csv_size); } -static void RegressionChunkCSVQuotedBlock( +static void ChunkCSVQuotedBlock( benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row, num_rows); auto options = ParseOptions::Defaults(); @@ -67,7 +67,7 @@ static void RegressionChunkCSVQuotedBlock( BenchmarkCSVChunking(state, csv, options); } -static void RegressionChunkCSVEscapedBlock( +static void ChunkCSVEscapedBlock( benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); @@ -78,7 +78,7 @@ static void RegressionChunkCSVEscapedBlock( BenchmarkCSVChunking(state, csv, options); } -static void RegressionChunkCSVNoNewlinesBlock( +static void ChunkCSVNoNewlinesBlock( benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); @@ -121,7 +121,7 @@ static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const re state.SetBytesProcessed(state.iterations() * csv_size); } -static void RegressionParseCSVQuotedBlock( +static void ParseCSVQuotedBlock( benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row, num_rows); auto options = ParseOptions::Defaults(); @@ -131,7 +131,7 @@ static void RegressionParseCSVQuotedBlock( BenchmarkCSVParsing(state, csv, num_rows, options); } -static void RegressionParseCSVEscapedBlock( +static void ParseCSVEscapedBlock( benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); @@ -141,11 +141,11 @@ static void RegressionParseCSVEscapedBlock( BenchmarkCSVParsing(state, csv, num_rows, options); } -BENCHMARK(RegressionChunkCSVQuotedBlock); -BENCHMARK(RegressionChunkCSVEscapedBlock); -BENCHMARK(RegressionChunkCSVNoNewlinesBlock); -BENCHMARK(RegressionParseCSVQuotedBlock); -BENCHMARK(RegressionParseCSVEscapedBlock); +BENCHMARK(ChunkCSVQuotedBlock); +BENCHMARK(ChunkCSVEscapedBlock); +BENCHMARK(ChunkCSVNoNewlinesBlock); +BENCHMARK(ParseCSVQuotedBlock); +BENCHMARK(ParseCSVEscapedBlock); } // namespace csv } // namespace arrow diff --git a/cpp/src/arrow/gpu/cuda-benchmark.cc b/cpp/src/arrow/gpu/cuda-benchmark.cc index a61eb921e91..2b7fe40fbe3 100644 --- a/cpp/src/arrow/gpu/cuda-benchmark.cc +++ b/cpp/src/arrow/gpu/cuda-benchmark.cc @@ -85,14 +85,12 @@ static void BM_Writer_Unbuffered(benchmark::State& state) { BENCHMARK(BM_Writer_Buffered) ->RangeMultiplier(16) ->Range(1 << 8, 1 << 16) - ->MinTime(1.0) ->UseRealTime(); BENCHMARK(BM_Writer_Unbuffered) ->RangeMultiplier(4) ->RangeMultiplier(16) ->Range(1 << 8, 1 << 16) - ->MinTime(1.0) ->UseRealTime(); } // namespace cuda diff --git a/cpp/src/arrow/io/file-benchmark.cc b/cpp/src/arrow/io/file-benchmark.cc index b4344238a1f..4557ff78ecd 100644 --- a/cpp/src/arrow/io/file-benchmark.cc +++ b/cpp/src/arrow/io/file-benchmark.cc @@ -224,31 +224,13 @@ static void BM_BufferedOutputStreamLargeWritesToPipe( // We use real time as we don't want to count CPU time spent in the // BackgroundReader thread -BENCHMARK(BM_FileOutputStreamSmallWritesToNull) - ->Repetitions(2) - ->MinTime(1.0) - ->UseRealTime(); -BENCHMARK(BM_FileOutputStreamSmallWritesToPipe) - ->Repetitions(2) - ->MinTime(1.0) - ->UseRealTime(); -BENCHMARK(BM_FileOutputStreamLargeWritesToPipe) - ->Repetitions(2) - ->MinTime(1.0) - ->UseRealTime(); - -BENCHMARK(BM_BufferedOutputStreamSmallWritesToNull) - ->Repetitions(2) - ->MinTime(1.0) - ->UseRealTime(); -BENCHMARK(BM_BufferedOutputStreamSmallWritesToPipe) - ->Repetitions(2) - ->MinTime(1.0) - ->UseRealTime(); -BENCHMARK(BM_BufferedOutputStreamLargeWritesToPipe) - ->Repetitions(2) - ->MinTime(1.0) - ->UseRealTime(); +BENCHMARK(BM_FileOutputStreamSmallWritesToNull)->UseRealTime(); +BENCHMARK(BM_FileOutputStreamSmallWritesToPipe)->UseRealTime(); +BENCHMARK(BM_FileOutputStreamLargeWritesToPipe)->UseRealTime(); + +BENCHMARK(BM_BufferedOutputStreamSmallWritesToNull)->UseRealTime(); +BENCHMARK(BM_BufferedOutputStreamSmallWritesToPipe)->UseRealTime(); +BENCHMARK(BM_BufferedOutputStreamLargeWritesToPipe)->UseRealTime(); #endif // ifndef _WIN32 diff --git a/cpp/src/arrow/io/memory-benchmark.cc b/cpp/src/arrow/io/memory-benchmark.cc index 78389574b63..a3676e41f2f 100644 --- a/cpp/src/arrow/io/memory-benchmark.cc +++ b/cpp/src/arrow/io/memory-benchmark.cc @@ -44,6 +44,7 @@ static const int64_t kL3Size = cpu_info->CacheSize(CpuInfo::L3_CACHE); constexpr size_t kMemoryPerCore = 32 * 1024 * 1024; using BufferPtr = std::shared_ptr; +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE #ifndef _MSC_VER #ifdef ARROW_AVX512 @@ -200,6 +201,7 @@ BENCHMARK_TEMPLATE(MemoryBandwidth, StreamReadWrite)->Apply(SetMemoryBandwidthAr BENCHMARK_TEMPLATE(MemoryBandwidth, PlatformMemcpy)->Apply(SetMemoryBandwidthArgs); #endif // _MSC_VER +#endif // ARROW_WITH_BENCHMARKS_REFERENCE static void ParallelMemoryCopy(benchmark::State& state) { // NOLINT non-const reference const int64_t n_threads = state.range(0); diff --git a/cpp/src/arrow/ipc/read-write-benchmark.cc b/cpp/src/arrow/ipc/read-write-benchmark.cc index 66d45fb0127..07ad2e40108 100644 --- a/cpp/src/arrow/ipc/read-write-benchmark.cc +++ b/cpp/src/arrow/ipc/read-write-benchmark.cc @@ -99,16 +99,7 @@ static void BM_ReadRecordBatch(benchmark::State& state) { // NOLINT non-const r state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize); } -BENCHMARK(BM_WriteRecordBatch) - ->RangeMultiplier(4) - ->Range(1, 1 << 13) - ->MinTime(1.0) - ->UseRealTime(); - -BENCHMARK(BM_ReadRecordBatch) - ->RangeMultiplier(4) - ->Range(1, 1 << 13) - ->MinTime(1.0) - ->UseRealTime(); +BENCHMARK(BM_WriteRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime(); +BENCHMARK(BM_ReadRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime(); } // namespace arrow diff --git a/cpp/src/arrow/json/parser-benchmark.cc b/cpp/src/arrow/json/parser-benchmark.cc index 5025c29c49e..40c357d819e 100644 --- a/cpp/src/arrow/json/parser-benchmark.cc +++ b/cpp/src/arrow/json/parser-benchmark.cc @@ -63,7 +63,7 @@ static void BenchmarkJSONChunking(benchmark::State& state, state.SetBytesProcessed(state.iterations() * json->size()); } -static void RegressionChunkJSONPrettyPrinted( +static void ChunkJSONPrettyPrinted( benchmark::State& state) { // NOLINT non-const reference const int32_t num_rows = 5000; @@ -75,7 +75,7 @@ static void RegressionChunkJSONPrettyPrinted( BenchmarkJSONChunking(state, std::make_shared(json), options); } -static void RegressionChunkJSONLineDelimited( +static void ChunkJSONLineDelimited( benchmark::State& state) { // NOLINT non-const reference const int32_t num_rows = 5000; @@ -102,7 +102,7 @@ static void BenchmarkJSONParsing(benchmark::State& state, // NOLINT non-const r state.SetBytesProcessed(state.iterations() * json->size()); } -static void RegressionParseJSONBlockWithSchema( +static void ParseJSONBlockWithSchema( benchmark::State& state) { // NOLINT non-const reference const int32_t num_rows = 5000; auto options = ParseOptions::Defaults(); @@ -145,7 +145,7 @@ static void BenchmarkReadJSONBlockWithSchema( BenchmarkJSONReading(state, json, num_rows, read_options, parse_options); } -static void RegressionReadJSONBlockWithSchemaSingleThread( +static void ReadJSONBlockWithSchemaSingleThread( benchmark::State& state) { // NOLINT non-const reference BenchmarkReadJSONBlockWithSchema(state, false); } @@ -155,11 +155,11 @@ static void ReferenceReadJSONBlockWithSchemaMultiThread( BenchmarkReadJSONBlockWithSchema(state, true); } -BENCHMARK(RegressionChunkJSONPrettyPrinted); -BENCHMARK(RegressionChunkJSONLineDelimited); -BENCHMARK(RegressionParseJSONBlockWithSchema); +BENCHMARK(ChunkJSONPrettyPrinted); +BENCHMARK(ChunkJSONLineDelimited); +BENCHMARK(ParseJSONBlockWithSchema); -BENCHMARK(RegressionReadJSONBlockWithSchemaSingleThread); +BENCHMARK(ReadJSONBlockWithSchemaSingleThread); BENCHMARK(ReferenceReadJSONBlockWithSchemaMultiThread)->UseRealTime(); } // namespace json diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc index f34b9bbdeca..488a56a5ade 100644 --- a/cpp/src/arrow/util/bit-util-benchmark.cc +++ b/cpp/src/arrow/util/bit-util-benchmark.cc @@ -28,8 +28,6 @@ namespace arrow { -using internal::CopyBitmap; - namespace BitUtil { // A naive bitmap reader implementation, meant as a baseline against @@ -180,7 +178,7 @@ static void ReferenceNaiveBitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } -static void RegressionBitmapReader(benchmark::State& state) { +static void BitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } @@ -188,11 +186,11 @@ static void ReferenceNaiveBitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } -static void RegressionBitmapWriter(benchmark::State& state) { +static void BitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } -static void RegressionFirstTimeBitmapWriter(benchmark::State& state) { +static void FirstTimeBitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } @@ -210,18 +208,18 @@ struct GenerateBitsUnrolledFunctor { } }; -static void RegressionGenerateBits(benchmark::State& state) { +static void GenerateBits(benchmark::State& state) { BenchmarkGenerateBits(state, state.range(0)); } -static void RegressionGenerateBitsUnrolled(benchmark::State& state) { +static void GenerateBitsUnrolled(benchmark::State& state) { BenchmarkGenerateBits(state, state.range(0)); } constexpr int64_t kBufferSize = 1024 * 8; template -static void RegressionCopyBitmap(benchmark::State& state) { // NOLINT non-const reference +static void CopyBitmap(benchmark::State& state) { // NOLINT non-const reference const int64_t buffer_size = state.range(0); const int64_t bits_size = buffer_size * 8; std::shared_ptr buffer = CreateRandomBuffer(buffer_size); @@ -235,34 +233,33 @@ static void RegressionCopyBitmap(benchmark::State& state) { // NOLINT non-const ABORT_NOT_OK(AllocateEmptyBitmap(pool, length, ©)); for (auto _ : state) { - CopyBitmap(src, offset, length, copy->mutable_data(), 0, false); + internal::CopyBitmap(src, offset, length, copy->mutable_data(), 0, false); } state.SetBytesProcessed(state.iterations() * buffer_size); } -static void RegressionCopyBitmapWithoutOffset( +static void CopyBitmapWithoutOffset( benchmark::State& state) { // NOLINT non-const reference - RegressionCopyBitmap<0>(state); + CopyBitmap<0>(state); } // Trigger the slow path where the buffer is not byte aligned. -static void RegressionCopyBitmapWithOffset( - benchmark::State& state) { // NOLINT non-const reference - RegressionCopyBitmap<4>(state); +static void CopyBitmapWithOffset(benchmark::State& state) { // NOLINT non-const reference + CopyBitmap<4>(state); } -BENCHMARK(RegressionCopyBitmapWithoutOffset)->Arg(kBufferSize); -BENCHMARK(RegressionCopyBitmapWithOffset)->Arg(kBufferSize); +BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); +BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize); -BENCHMARK(RegressionBitmapReader)->Arg(kBufferSize); +BENCHMARK(BitmapReader)->Arg(kBufferSize); BENCHMARK(ReferenceNaiveBitmapWriter)->Arg(kBufferSize); -BENCHMARK(RegressionBitmapWriter)->Arg(kBufferSize); +BENCHMARK(BitmapWriter)->Arg(kBufferSize); -BENCHMARK(RegressionFirstTimeBitmapWriter)->Arg(kBufferSize); -BENCHMARK(RegressionGenerateBits)->Arg(kBufferSize); -BENCHMARK(RegressionGenerateBitsUnrolled)->Arg(kBufferSize); +BENCHMARK(FirstTimeBitmapWriter)->Arg(kBufferSize); +BENCHMARK(GenerateBits)->Arg(kBufferSize); +BENCHMARK(GenerateBitsUnrolled)->Arg(kBufferSize); } // namespace BitUtil } // namespace arrow diff --git a/cpp/src/arrow/util/compression-benchmark.cc b/cpp/src/arrow/util/compression-benchmark.cc index e71d80ada49..2aefe19b165 100644 --- a/cpp/src/arrow/util/compression-benchmark.cc +++ b/cpp/src/arrow/util/compression-benchmark.cc @@ -180,31 +180,15 @@ static void BM_StreamingDecompression( BM_StreamingDecompression(COMPRESSION, data, state); } -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::GZIP) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::BROTLI) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::ZSTD) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::LZ4) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); - -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::GZIP) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::BROTLI) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::ZSTD) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::LZ4) - ->Unit(benchmark::kMillisecond) - ->Repetitions(1); +BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::GZIP); +BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::BROTLI); +BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::ZSTD); +BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::LZ4); + +BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::GZIP); +BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::BROTLI); +BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::ZSTD); +BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::LZ4); } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/decimal-benchmark.cc b/cpp/src/arrow/util/decimal-benchmark.cc index 3129536cf0a..b2e01e180f7 100644 --- a/cpp/src/arrow/util/decimal-benchmark.cc +++ b/cpp/src/arrow/util/decimal-benchmark.cc @@ -39,7 +39,7 @@ static void BM_FromString(benchmark::State& state) { // NOLINT non-const refere state.SetItemsProcessed(state.iterations() * values.size()); } -BENCHMARK(BM_FromString)->Repetitions(3)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_FromString); } // namespace Decimal } // namespace arrow diff --git a/cpp/src/arrow/util/hashing-benchmark.cc b/cpp/src/arrow/util/hashing-benchmark.cc index 2049c4e64e1..3a0eb3c35c1 100644 --- a/cpp/src/arrow/util/hashing-benchmark.cc +++ b/cpp/src/arrow/util/hashing-benchmark.cc @@ -114,15 +114,10 @@ static void BM_HashLargeStrings(benchmark::State& state) { // NOLINT non-const // ---------------------------------------------------------------------- // Benchmark declarations -static constexpr int32_t kRepetitions = 1; - -BENCHMARK(BM_HashIntegers)->Repetitions(kRepetitions)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_HashSmallStrings)->Repetitions(kRepetitions)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_HashMediumStrings)->Repetitions(kRepetitions)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_HashLargeStrings)->Repetitions(kRepetitions)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_HashIntegers); +BENCHMARK(BM_HashSmallStrings); +BENCHMARK(BM_HashMediumStrings); +BENCHMARK(BM_HashLargeStrings); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/int-util-benchmark.cc b/cpp/src/arrow/util/int-util-benchmark.cc index 3feb2eeafeb..37111da6338 100644 --- a/cpp/src/arrow/util/int-util-benchmark.cc +++ b/cpp/src/arrow/util/int-util-benchmark.cc @@ -97,13 +97,10 @@ static void BM_DetectIntWidthNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -BENCHMARK(BM_DetectUIntWidthNoNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_DetectUIntWidthNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_DetectIntWidthNoNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond); - -BENCHMARK(BM_DetectIntWidthNulls)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_DetectUIntWidthNoNulls); +BENCHMARK(BM_DetectUIntWidthNulls); +BENCHMARK(BM_DetectIntWidthNoNulls); +BENCHMARK(BM_DetectIntWidthNulls); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/lazy-benchmark.cc b/cpp/src/arrow/util/lazy-benchmark.cc index 02c7de5c22d..d00571b3040 100644 --- a/cpp/src/arrow/util/lazy-benchmark.cc +++ b/cpp/src/arrow/util/lazy-benchmark.cc @@ -27,6 +27,8 @@ namespace arrow { +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + static constexpr int64_t kSize = 100000000; template @@ -46,7 +48,7 @@ void BM_for_loop(benchmark::State& state) { } } -BENCHMARK(BM_for_loop)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_for_loop); // For comparison: pure copy without any changes void BM_std_copy(benchmark::State& state) { @@ -58,7 +60,7 @@ void BM_std_copy(benchmark::State& state) { } } -BENCHMARK(BM_std_copy)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_std_copy); // For comparison: pure copy with type convesion. void BM_std_copy_converting(benchmark::State& state) { @@ -71,7 +73,7 @@ void BM_std_copy_converting(benchmark::State& state) { } } -BENCHMARK(BM_std_copy_converting)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_std_copy_converting); // std::copy with a lazy range as a source void BM_lazy_copy(benchmark::State& state) { @@ -85,7 +87,7 @@ void BM_lazy_copy(benchmark::State& state) { } } -BENCHMARK(BM_lazy_copy)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_lazy_copy); // std::copy with a lazy range which does static cast. // Should be the same performance as std::copy with differtly typed iterators @@ -101,7 +103,7 @@ void BM_lazy_copy_converting(benchmark::State& state) { } } -BENCHMARK(BM_lazy_copy_converting)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_lazy_copy_converting); // For loop with a post-increment of a lazy operator void BM_lazy_postinc(benchmark::State& state) { @@ -119,6 +121,8 @@ void BM_lazy_postinc(benchmark::State& state) { } } -BENCHMARK(BM_lazy_postinc)->Repetitions(3)->Unit(benchmark::kMillisecond); +BENCHMARK(BM_lazy_postinc); + +#endif // ARROW_WITH_BENCHMARKS_REFERENCE } // namespace arrow diff --git a/cpp/src/arrow/util/machine-benchmark.cc b/cpp/src/arrow/util/machine-benchmark.cc index ad3f413e7f0..e76fae2df50 100644 --- a/cpp/src/arrow/util/machine-benchmark.cc +++ b/cpp/src/arrow/util/machine-benchmark.cc @@ -28,6 +28,8 @@ namespace arrow { +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + // Generate a vector of indices such as following the indices describes // a path over the whole vector. The path is randomized to avoid triggering // automatic prefetching in the CPU. @@ -65,6 +67,8 @@ static void BM_memory_latency(benchmark::State& state) { state.SetItemsProcessed(state.iterations()); } -BENCHMARK(BM_memory_latency)->RangeMultiplier(2)->Range(2 << 10, 2 << 24); +BENCHMARK(BM_memory_latency)->Repetitions(1)->RangeMultiplier(2)->Range(2 << 10, 2 << 24); + +#endif // ARROW_WITH_BENCHMARKS_REFERENCE } // namespace arrow diff --git a/cpp/src/arrow/util/thread-pool-benchmark.cc b/cpp/src/arrow/util/thread-pool-benchmark.cc index f0f23622deb..bcc88b83fda 100644 --- a/cpp/src/arrow/util/thread-pool-benchmark.cc +++ b/cpp/src/arrow/util/thread-pool-benchmark.cc @@ -168,6 +168,8 @@ static void WorkloadCost_Customize(benchmark::internal::Benchmark* b) { b->Args({w}); } b->ArgNames({"task_cost"}); + b->UseRealTime(); + b->Repetitions(1); } static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) { @@ -177,26 +179,14 @@ static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) { } } b->ArgNames({"threads", "task_cost"}); + b->UseRealTime(); + b->Repetitions(1); } -static const int kRepetitions = 1; - -BENCHMARK(BM_WorkloadCost)->Repetitions(kRepetitions)->Apply(WorkloadCost_Customize); - -BENCHMARK(BM_ThreadPoolSpawn) - ->UseRealTime() - ->Repetitions(kRepetitions) - ->Apply(ThreadPoolSpawn_Customize); - -BENCHMARK(BM_SerialTaskGroup) - ->UseRealTime() - ->Repetitions(kRepetitions) - ->Apply(WorkloadCost_Customize); - -BENCHMARK(BM_ThreadedTaskGroup) - ->UseRealTime() - ->Repetitions(kRepetitions) - ->Apply(ThreadPoolSpawn_Customize); +BENCHMARK(BM_WorkloadCost)->Apply(WorkloadCost_Customize); +BENCHMARK(BM_SerialTaskGroup)->Apply(WorkloadCost_Customize); +BENCHMARK(BM_ThreadPoolSpawn)->Apply(ThreadPoolSpawn_Customize); +BENCHMARK(BM_ThreadedTaskGroup)->Apply(ThreadPoolSpawn_Customize); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/trie-benchmark.cc b/cpp/src/arrow/util/trie-benchmark.cc index 8aab8b8c62c..7f21b8b958c 100644 --- a/cpp/src/arrow/util/trie-benchmark.cc +++ b/cpp/src/arrow/util/trie-benchmark.cc @@ -210,12 +210,10 @@ static void BM_InlinedTrieLookupNotFound( BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"}); } -static const int kRepetitions = 2; - -BENCHMARK(BM_TrieLookupFound)->Repetitions(kRepetitions); -BENCHMARK(BM_TrieLookupNotFound)->Repetitions(kRepetitions); -BENCHMARK(BM_InlinedTrieLookupFound)->Repetitions(kRepetitions); -BENCHMARK(BM_InlinedTrieLookupNotFound)->Repetitions(kRepetitions); +BENCHMARK(BM_TrieLookupFound); +BENCHMARK(BM_TrieLookupNotFound); +BENCHMARK(BM_InlinedTrieLookupFound); +BENCHMARK(BM_InlinedTrieLookupNotFound); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/utf8-util-benchmark.cc b/cpp/src/arrow/util/utf8-util-benchmark.cc index 7e03a4e5324..dd6eec6de4a 100644 --- a/cpp/src/arrow/util/utf8-util-benchmark.cc +++ b/cpp/src/arrow/util/utf8-util-benchmark.cc @@ -81,8 +81,7 @@ static void BM_ValidateTinyNonAscii( static void BM_ValidateSmallAscii( benchmark::State& state) { // NOLINT non-const reference - BenchmarkUTF8Validation(state, valid_ascii, true); -} + BenchmarkUTF8Validation(state, valid_ascii, true); } static void BM_ValidateSmallAlmostAscii( benchmark::State& state) { // NOLINT non-const reference @@ -112,16 +111,14 @@ static void BM_ValidateLargeNonAscii( BenchmarkUTF8Validation(state, s, true); } -static const int kRepetitions = 1; - -BENCHMARK(BM_ValidateTinyAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateTinyNonAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateSmallAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateSmallAlmostAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateSmallNonAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateLargeAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateLargeAlmostAscii)->Repetitions(kRepetitions); -BENCHMARK(BM_ValidateLargeNonAscii)->Repetitions(kRepetitions); +BENCHMARK(BM_ValidateTinyAscii); +BENCHMARK(BM_ValidateTinyNonAscii); +BENCHMARK(BM_ValidateSmallAscii); +BENCHMARK(BM_ValidateSmallAlmostAscii); +BENCHMARK(BM_ValidateSmallNonAscii); +BENCHMARK(BM_ValidateLargeAscii); +BENCHMARK(BM_ValidateLargeAlmostAscii); +BENCHMARK(BM_ValidateLargeNonAscii); } // namespace util } // namespace arrow diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index e9221fe14f7..1d67624c3c5 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -214,7 +214,7 @@ def benchmark_list(ctx, src, preserve, output, cmake_extras, baseline): @click.option("--suite-filter", metavar="", show_default=True, type=str, default=None, help="Regex filtering benchmark suites.") @click.option("--benchmark-filter", metavar="", show_default=True, - type=str, default=DEFAULT_BENCHMARK_FILTER, + type=str, default=None, help="Regex filtering benchmark suites.") @click.option("--preserve", type=bool, default=False, show_default=True, is_flag=True, help="Preserve workspace for investigation.") From 8334c68f7fe10466dbdb62e4d0529fda93f91666 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Wed, 22 May 2019 13:52:16 -0400 Subject: [PATCH 10/14] Remove BM_ benchmark prefix --- cpp/src/arrow/column-benchmark.cc | 4 +- cpp/src/arrow/compute/compute-benchmark.cc | 32 ++++++++-------- cpp/src/arrow/gpu/cuda-benchmark.cc | 8 ++-- cpp/src/arrow/io/file-benchmark.cc | 24 ++++++------ cpp/src/arrow/ipc/read-write-benchmark.cc | 8 ++-- cpp/src/arrow/util/compression-benchmark.cc | 28 +++++++------- cpp/src/arrow/util/decimal-benchmark.cc | 4 +- cpp/src/arrow/util/hashing-benchmark.cc | 16 ++++---- cpp/src/arrow/util/int-util-benchmark.cc | 16 ++++---- cpp/src/arrow/util/lazy-benchmark.cc | 24 ++++++------ cpp/src/arrow/util/machine-benchmark.cc | 4 +- .../arrow/util/number-parsing-benchmark.cc | 38 +++++++++---------- cpp/src/arrow/util/thread-pool-benchmark.cc | 18 ++++----- cpp/src/arrow/util/trie-benchmark.cc | 16 ++++---- cpp/src/arrow/util/utf8-util-benchmark.cc | 32 ++++++++-------- 15 files changed, 136 insertions(+), 136 deletions(-) diff --git a/cpp/src/arrow/column-benchmark.cc b/cpp/src/arrow/column-benchmark.cc index 3ae83f6a210..bb2c63179ab 100644 --- a/cpp/src/arrow/column-benchmark.cc +++ b/cpp/src/arrow/column-benchmark.cc @@ -36,7 +36,7 @@ Status MakePrimitive(int64_t length, int64_t null_count, std::shared_ptr* } } // anonymous namespace -static void BM_BuildInt32ColumnByChunk( +static void BuildInt32ColumnByChunk( benchmark::State& state) { // NOLINT non-const reference ArrayVector arrays; for (int chunk_n = 0; chunk_n < state.range(0); ++chunk_n) { @@ -52,6 +52,6 @@ static void BM_BuildInt32ColumnByChunk( } } -BENCHMARK(BM_BuildInt32ColumnByChunk)->Range(5, 50000); +BENCHMARK(BuildInt32ColumnByChunk)->Range(5, 50000); } // namespace arrow diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index 7650b475748..a3af8c57029 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -31,7 +31,7 @@ namespace arrow { namespace compute { -static void BM_BuildDictionary(benchmark::State& state) { // NOLINT non-const reference +static void BuildDictionary(benchmark::State& state) { // NOLINT non-const reference const int64_t iterations = 1024; std::vector values; @@ -55,7 +55,7 @@ static void BM_BuildDictionary(benchmark::State& state) { // NOLINT non-const r state.SetBytesProcessed(state.iterations() * values.size() * sizeof(int64_t)); } -static void BM_BuildStringDictionary( +static void BuildStringDictionary( benchmark::State& state) { // NOLINT non-const reference const int64_t iterations = 1024 * 64; // Pre-render strings @@ -172,34 +172,34 @@ void BenchDictionaryEncode(benchmark::State& state, const ParamType& params, state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); } -static void BM_UniqueUInt8NoNulls(benchmark::State& state) { +static void UniqueUInt8NoNulls(benchmark::State& state) { BenchUnique(state, HashParams{0}, state.range(0), state.range(1)); } -static void BM_UniqueUInt8WithNulls(benchmark::State& state) { +static void UniqueUInt8WithNulls(benchmark::State& state) { BenchUnique(state, HashParams{0.05}, state.range(0), state.range(1)); } -static void BM_UniqueInt64NoNulls(benchmark::State& state) { +static void UniqueInt64NoNulls(benchmark::State& state) { BenchUnique(state, HashParams{0}, state.range(0), state.range(1)); } -static void BM_UniqueInt64WithNulls(benchmark::State& state) { +static void UniqueInt64WithNulls(benchmark::State& state) { BenchUnique(state, HashParams{0.05}, state.range(0), state.range(1)); } -static void BM_UniqueString10bytes(benchmark::State& state) { +static void UniqueString10bytes(benchmark::State& state) { // Byte strings with 10 bytes each BenchUnique(state, HashParams{0.05, 10}, state.range(0), state.range(1)); } -static void BM_UniqueString100bytes(benchmark::State& state) { +static void UniqueString100bytes(benchmark::State& state) { // Byte strings with 100 bytes each BenchUnique(state, HashParams{0.05, 100}, state.range(0), state.range(1)); } -BENCHMARK(BM_BuildDictionary); -BENCHMARK(BM_BuildStringDictionary); +BENCHMARK(BuildDictionary); +BENCHMARK(BuildStringDictionary); constexpr int kHashBenchmarkLength = 1 << 22; @@ -207,16 +207,16 @@ constexpr int kHashBenchmarkLength = 1 << 22; WHAT->Args({kHashBenchmarkLength, 1 << 10}) \ ->Args({kHashBenchmarkLength, 10 * 1 << 10}) \ -ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64NoNulls)); -ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64WithNulls)); -ADD_HASH_ARGS(BENCHMARK(BM_UniqueString10bytes)); -ADD_HASH_ARGS(BENCHMARK(BM_UniqueString100bytes)); +ADD_HASH_ARGS(BENCHMARK(UniqueInt64NoNulls)); +ADD_HASH_ARGS(BENCHMARK(UniqueInt64WithNulls)); +ADD_HASH_ARGS(BENCHMARK(UniqueString10bytes)); +ADD_HASH_ARGS(BENCHMARK(UniqueString100bytes)); -BENCHMARK(BM_UniqueUInt8NoNulls) +BENCHMARK(UniqueUInt8NoNulls) ->Args({kHashBenchmarkLength, 200}) ->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_UniqueUInt8WithNulls) +BENCHMARK(UniqueUInt8WithNulls) ->Args({kHashBenchmarkLength, 200}) ->Unit(benchmark::kMicrosecond); diff --git a/cpp/src/arrow/gpu/cuda-benchmark.cc b/cpp/src/arrow/gpu/cuda-benchmark.cc index 2b7fe40fbe3..96c3d0f6559 100644 --- a/cpp/src/arrow/gpu/cuda-benchmark.cc +++ b/cpp/src/arrow/gpu/cuda-benchmark.cc @@ -65,7 +65,7 @@ static void CudaBufferWriterBenchmark(benchmark::State& state, const int64_t tot state.SetBytesProcessed(int64_t(state.iterations()) * total_bytes); } -static void BM_Writer_Buffered(benchmark::State& state) { +static void Writer_Buffered(benchmark::State& state) { // 128MB const int64_t kTotalBytes = 1 << 27; @@ -75,19 +75,19 @@ static void BM_Writer_Buffered(benchmark::State& state) { CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), kBufferSize); } -static void BM_Writer_Unbuffered(benchmark::State& state) { +static void Writer_Unbuffered(benchmark::State& state) { // 128MB const int64_t kTotalBytes = 1 << 27; CudaBufferWriterBenchmark(state, kTotalBytes, state.range(0), 0); } // Vary chunk write size from 256 bytes to 64K -BENCHMARK(BM_Writer_Buffered) +BENCHMARK(Writer_Buffered) ->RangeMultiplier(16) ->Range(1 << 8, 1 << 16) ->UseRealTime(); -BENCHMARK(BM_Writer_Unbuffered) +BENCHMARK(Writer_Unbuffered) ->RangeMultiplier(4) ->RangeMultiplier(16) ->Range(1 << 8, 1 << 16) diff --git a/cpp/src/arrow/io/file-benchmark.cc b/cpp/src/arrow/io/file-benchmark.cc index 4557ff78ecd..74b92cbf3d6 100644 --- a/cpp/src/arrow/io/file-benchmark.cc +++ b/cpp/src/arrow/io/file-benchmark.cc @@ -155,7 +155,7 @@ static void BenchmarkStreamingWrites(benchmark::State& state, // This situation is irrealistic as the kernel likely doesn't // copy the data at all, so we only measure small writes. -static void BM_FileOutputStreamSmallWritesToNull( +static void FileOutputStreamSmallWritesToNull( benchmark::State& state) { // NOLINT non-const reference std::shared_ptr stream; ABORT_NOT_OK(io::FileOutputStream::Open(GetNullFile(), &stream)); @@ -163,7 +163,7 @@ static void BM_FileOutputStreamSmallWritesToNull( BenchmarkStreamingWrites(state, small_sizes, stream.get()); } -static void BM_BufferedOutputStreamSmallWritesToNull( +static void BufferedOutputStreamSmallWritesToNull( benchmark::State& state) { // NOLINT non-const reference std::shared_ptr file; ABORT_NOT_OK(io::FileOutputStream::Open(GetNullFile(), &file)); @@ -178,7 +178,7 @@ static void BM_BufferedOutputStreamSmallWritesToNull( // // This is slightly more realistic than the above -static void BM_FileOutputStreamSmallWritesToPipe( +static void FileOutputStreamSmallWritesToPipe( benchmark::State& state) { // NOLINT non-const reference std::shared_ptr stream; std::shared_ptr reader; @@ -187,7 +187,7 @@ static void BM_FileOutputStreamSmallWritesToPipe( BenchmarkStreamingWrites(state, small_sizes, stream.get(), reader.get()); } -static void BM_FileOutputStreamLargeWritesToPipe( +static void FileOutputStreamLargeWritesToPipe( benchmark::State& state) { // NOLINT non-const reference std::shared_ptr stream; std::shared_ptr reader; @@ -196,7 +196,7 @@ static void BM_FileOutputStreamLargeWritesToPipe( BenchmarkStreamingWrites(state, large_sizes, stream.get(), reader.get()); } -static void BM_BufferedOutputStreamSmallWritesToPipe( +static void BufferedOutputStreamSmallWritesToPipe( benchmark::State& state) { // NOLINT non-const reference std::shared_ptr stream; std::shared_ptr reader; @@ -208,7 +208,7 @@ static void BM_BufferedOutputStreamSmallWritesToPipe( BenchmarkStreamingWrites(state, small_sizes, buffered_stream.get(), reader.get()); } -static void BM_BufferedOutputStreamLargeWritesToPipe( +static void BufferedOutputStreamLargeWritesToPipe( benchmark::State& state) { // NOLINT non-const reference std::shared_ptr stream; std::shared_ptr reader; @@ -224,13 +224,13 @@ static void BM_BufferedOutputStreamLargeWritesToPipe( // We use real time as we don't want to count CPU time spent in the // BackgroundReader thread -BENCHMARK(BM_FileOutputStreamSmallWritesToNull)->UseRealTime(); -BENCHMARK(BM_FileOutputStreamSmallWritesToPipe)->UseRealTime(); -BENCHMARK(BM_FileOutputStreamLargeWritesToPipe)->UseRealTime(); +BENCHMARK(FileOutputStreamSmallWritesToNull)->UseRealTime(); +BENCHMARK(FileOutputStreamSmallWritesToPipe)->UseRealTime(); +BENCHMARK(FileOutputStreamLargeWritesToPipe)->UseRealTime(); -BENCHMARK(BM_BufferedOutputStreamSmallWritesToNull)->UseRealTime(); -BENCHMARK(BM_BufferedOutputStreamSmallWritesToPipe)->UseRealTime(); -BENCHMARK(BM_BufferedOutputStreamLargeWritesToPipe)->UseRealTime(); +BENCHMARK(BufferedOutputStreamSmallWritesToNull)->UseRealTime(); +BENCHMARK(BufferedOutputStreamSmallWritesToPipe)->UseRealTime(); +BENCHMARK(BufferedOutputStreamLargeWritesToPipe)->UseRealTime(); #endif // ifndef _WIN32 diff --git a/cpp/src/arrow/ipc/read-write-benchmark.cc b/cpp/src/arrow/ipc/read-write-benchmark.cc index 07ad2e40108..6f66f9c4926 100644 --- a/cpp/src/arrow/ipc/read-write-benchmark.cc +++ b/cpp/src/arrow/ipc/read-write-benchmark.cc @@ -47,7 +47,7 @@ std::shared_ptr MakeRecordBatch(int64_t total_size, int64_t num_fie return RecordBatch::Make(schema, length, arrays); } -static void BM_WriteRecordBatch(benchmark::State& state) { // NOLINT non-const reference +static void WriteRecordBatch(benchmark::State& state) { // NOLINT non-const reference // 1MB constexpr int64_t kTotalSize = 1 << 20; @@ -68,7 +68,7 @@ static void BM_WriteRecordBatch(benchmark::State& state) { // NOLINT non-const state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize); } -static void BM_ReadRecordBatch(benchmark::State& state) { // NOLINT non-const reference +static void ReadRecordBatch(benchmark::State& state) { // NOLINT non-const reference // 1MB constexpr int64_t kTotalSize = 1 << 20; @@ -99,7 +99,7 @@ static void BM_ReadRecordBatch(benchmark::State& state) { // NOLINT non-const r state.SetBytesProcessed(int64_t(state.iterations()) * kTotalSize); } -BENCHMARK(BM_WriteRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime(); -BENCHMARK(BM_ReadRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime(); +BENCHMARK(WriteRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime(); +BENCHMARK(ReadRecordBatch)->RangeMultiplier(4)->Range(1, 1 << 13)->UseRealTime(); } // namespace arrow diff --git a/cpp/src/arrow/util/compression-benchmark.cc b/cpp/src/arrow/util/compression-benchmark.cc index 2aefe19b165..8595ae5f80d 100644 --- a/cpp/src/arrow/util/compression-benchmark.cc +++ b/cpp/src/arrow/util/compression-benchmark.cc @@ -111,7 +111,7 @@ int64_t StreamingCompress(Codec* codec, const std::vector& data, return compressed_size; } -static void BM_StreamingCompression( +static void StreamingCompression( Compression::type compression, const std::vector& data, benchmark::State& state) { // NOLINT non-const reference std::unique_ptr codec; @@ -126,14 +126,14 @@ static void BM_StreamingCompression( } template -static void BM_StreamingCompression( +static void StreamingCompression( benchmark::State& state) { // NOLINT non-const reference auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB - BM_StreamingCompression(COMPRESSION, data, state); + StreamingCompression(COMPRESSION, data, state); } -static void BM_StreamingDecompression( +static void StreamingDecompression( Compression::type compression, const std::vector& data, benchmark::State& state) { // NOLINT non-const reference std::unique_ptr codec; @@ -173,22 +173,22 @@ static void BM_StreamingDecompression( } template -static void BM_StreamingDecompression( +static void StreamingDecompression( benchmark::State& state) { // NOLINT non-const reference auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB - BM_StreamingDecompression(COMPRESSION, data, state); + StreamingDecompression(COMPRESSION, data, state); } -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::GZIP); -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::BROTLI); -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::ZSTD); -BENCHMARK_TEMPLATE(BM_StreamingCompression, Compression::LZ4); +BENCHMARK_TEMPLATE(StreamingCompression, Compression::GZIP); +BENCHMARK_TEMPLATE(StreamingCompression, Compression::BROTLI); +BENCHMARK_TEMPLATE(StreamingCompression, Compression::ZSTD); +BENCHMARK_TEMPLATE(StreamingCompression, Compression::LZ4); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::GZIP); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::BROTLI); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::ZSTD); -BENCHMARK_TEMPLATE(BM_StreamingDecompression, Compression::LZ4); +BENCHMARK_TEMPLATE(StreamingDecompression, Compression::GZIP); +BENCHMARK_TEMPLATE(StreamingDecompression, Compression::BROTLI); +BENCHMARK_TEMPLATE(StreamingDecompression, Compression::ZSTD); +BENCHMARK_TEMPLATE(StreamingDecompression, Compression::LZ4); } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/decimal-benchmark.cc b/cpp/src/arrow/util/decimal-benchmark.cc index b2e01e180f7..e5b1d2a3c4e 100644 --- a/cpp/src/arrow/util/decimal-benchmark.cc +++ b/cpp/src/arrow/util/decimal-benchmark.cc @@ -26,7 +26,7 @@ namespace arrow { namespace Decimal { -static void BM_FromString(benchmark::State& state) { // NOLINT non-const reference +static void FromString(benchmark::State& state) { // NOLINT non-const reference std::vector values = {"0", "1.23", "12.345e6", "-12.345e-6"}; while (state.KeepRunning()) { @@ -39,7 +39,7 @@ static void BM_FromString(benchmark::State& state) { // NOLINT non-const refere state.SetItemsProcessed(state.iterations() * values.size()); } -BENCHMARK(BM_FromString); +BENCHMARK(FromString); } // namespace Decimal } // namespace arrow diff --git a/cpp/src/arrow/util/hashing-benchmark.cc b/cpp/src/arrow/util/hashing-benchmark.cc index 3a0eb3c35c1..c7051d1a351 100644 --- a/cpp/src/arrow/util/hashing-benchmark.cc +++ b/cpp/src/arrow/util/hashing-benchmark.cc @@ -62,7 +62,7 @@ static std::vector MakeStrings(int32_t n_values, int32_t min_length return values; } -static void BM_HashIntegers(benchmark::State& state) { // NOLINT non-const reference +static void HashIntegers(benchmark::State& state) { // NOLINT non-const reference const std::vector values = MakeIntegers(10000); while (state.KeepRunning()) { @@ -96,17 +96,17 @@ static void BenchmarkStringHashing(benchmark::State& state, // NOLINT non-const state.SetItemsProcessed(2 * state.iterations() * values.size()); } -static void BM_HashSmallStrings(benchmark::State& state) { // NOLINT non-const reference +static void HashSmallStrings(benchmark::State& state) { // NOLINT non-const reference const std::vector values = MakeStrings(10000, 2, 20); BenchmarkStringHashing(state, values); } -static void BM_HashMediumStrings(benchmark::State& state) { // NOLINT non-const reference +static void HashMediumStrings(benchmark::State& state) { // NOLINT non-const reference const std::vector values = MakeStrings(10000, 20, 120); BenchmarkStringHashing(state, values); } -static void BM_HashLargeStrings(benchmark::State& state) { // NOLINT non-const reference +static void HashLargeStrings(benchmark::State& state) { // NOLINT non-const reference const std::vector values = MakeStrings(1000, 120, 2000); BenchmarkStringHashing(state, values); } @@ -114,10 +114,10 @@ static void BM_HashLargeStrings(benchmark::State& state) { // NOLINT non-const // ---------------------------------------------------------------------- // Benchmark declarations -BENCHMARK(BM_HashIntegers); -BENCHMARK(BM_HashSmallStrings); -BENCHMARK(BM_HashMediumStrings); -BENCHMARK(BM_HashLargeStrings); +BENCHMARK(HashIntegers); +BENCHMARK(HashSmallStrings); +BENCHMARK(HashMediumStrings); +BENCHMARK(HashLargeStrings); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/int-util-benchmark.cc b/cpp/src/arrow/util/int-util-benchmark.cc index 37111da6338..89356969c27 100644 --- a/cpp/src/arrow/util/int-util-benchmark.cc +++ b/cpp/src/arrow/util/int-util-benchmark.cc @@ -49,7 +49,7 @@ std::vector GetValidBytes(int n_values) { return valid_bytes; } -static void BM_DetectUIntWidthNoNulls( +static void DetectUIntWidthNoNulls( benchmark::State& state) { // NOLINT non-const reference const auto values = GetUIntSequence(0x12345); @@ -60,7 +60,7 @@ static void BM_DetectUIntWidthNoNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -static void BM_DetectUIntWidthNulls( +static void DetectUIntWidthNulls( benchmark::State& state) { // NOLINT non-const reference const auto values = GetUIntSequence(0x12345); const auto valid_bytes = GetValidBytes(0x12345); @@ -73,7 +73,7 @@ static void BM_DetectUIntWidthNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -static void BM_DetectIntWidthNoNulls( +static void DetectIntWidthNoNulls( benchmark::State& state) { // NOLINT non-const reference const auto values = GetIntSequence(0x12345, -0x1234); @@ -84,7 +84,7 @@ static void BM_DetectIntWidthNoNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -static void BM_DetectIntWidthNulls( +static void DetectIntWidthNulls( benchmark::State& state) { // NOLINT non-const reference const auto values = GetIntSequence(0x12345, -0x1234); const auto valid_bytes = GetValidBytes(0x12345); @@ -97,10 +97,10 @@ static void BM_DetectIntWidthNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -BENCHMARK(BM_DetectUIntWidthNoNulls); -BENCHMARK(BM_DetectUIntWidthNulls); -BENCHMARK(BM_DetectIntWidthNoNulls); -BENCHMARK(BM_DetectIntWidthNulls); +BENCHMARK(DetectUIntWidthNoNulls); +BENCHMARK(DetectUIntWidthNulls); +BENCHMARK(DetectIntWidthNoNulls); +BENCHMARK(DetectIntWidthNulls); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/lazy-benchmark.cc b/cpp/src/arrow/util/lazy-benchmark.cc index d00571b3040..ec39f1f873c 100644 --- a/cpp/src/arrow/util/lazy-benchmark.cc +++ b/cpp/src/arrow/util/lazy-benchmark.cc @@ -39,7 +39,7 @@ std::vector generate_junk(int64_t size) { } // Baseline -void BM_for_loop(benchmark::State& state) { +void for_loop(benchmark::State& state) { auto source = generate_junk(kSize); std::vector target(kSize); @@ -48,10 +48,10 @@ void BM_for_loop(benchmark::State& state) { } } -BENCHMARK(BM_for_loop); +BENCHMARK(for_loop); // For comparison: pure copy without any changes -void BM_std_copy(benchmark::State& state) { +void std_copy(benchmark::State& state) { auto source = generate_junk(kSize); std::vector target(kSize); @@ -60,10 +60,10 @@ void BM_std_copy(benchmark::State& state) { } } -BENCHMARK(BM_std_copy); +BENCHMARK(std_copy); // For comparison: pure copy with type convesion. -void BM_std_copy_converting(benchmark::State& state) { +void std_copy_converting(benchmark::State& state) { auto source = generate_junk(kSize); // bigger type to avoid warnings std::vector target(kSize); @@ -73,10 +73,10 @@ void BM_std_copy_converting(benchmark::State& state) { } } -BENCHMARK(BM_std_copy_converting); +BENCHMARK(std_copy_converting); // std::copy with a lazy range as a source -void BM_lazy_copy(benchmark::State& state) { +void lazy_copy(benchmark::State& state) { auto source = generate_junk(kSize); std::vector target(kSize); auto lazy_range = internal::MakeLazyRange( @@ -87,11 +87,11 @@ void BM_lazy_copy(benchmark::State& state) { } } -BENCHMARK(BM_lazy_copy); +BENCHMARK(lazy_copy); // std::copy with a lazy range which does static cast. // Should be the same performance as std::copy with differtly typed iterators -void BM_lazy_copy_converting(benchmark::State& state) { +void lazy_copy_converting(benchmark::State& state) { auto source = generate_junk(kSize); std::vector target(kSize); auto lazy_range = internal::MakeLazyRange( @@ -103,10 +103,10 @@ void BM_lazy_copy_converting(benchmark::State& state) { } } -BENCHMARK(BM_lazy_copy_converting); +BENCHMARK(lazy_copy_converting); // For loop with a post-increment of a lazy operator -void BM_lazy_postinc(benchmark::State& state) { +void lazy_postinc(benchmark::State& state) { auto source = generate_junk(kSize); std::vector target(kSize); auto lazy_range = internal::MakeLazyRange( @@ -121,7 +121,7 @@ void BM_lazy_postinc(benchmark::State& state) { } } -BENCHMARK(BM_lazy_postinc); +BENCHMARK(lazy_postinc); #endif // ARROW_WITH_BENCHMARKS_REFERENCE diff --git a/cpp/src/arrow/util/machine-benchmark.cc b/cpp/src/arrow/util/machine-benchmark.cc index e76fae2df50..67397444bd9 100644 --- a/cpp/src/arrow/util/machine-benchmark.cc +++ b/cpp/src/arrow/util/machine-benchmark.cc @@ -53,7 +53,7 @@ std::vector RandomPath(int32_t size) { } // Cache / main memory latency, depending on the working set size -static void BM_memory_latency(benchmark::State& state) { +static void memory_latency(benchmark::State& state) { const auto niters = static_cast(state.range(0)); const std::vector path = RandomPath(niters / 4); @@ -67,7 +67,7 @@ static void BM_memory_latency(benchmark::State& state) { state.SetItemsProcessed(state.iterations()); } -BENCHMARK(BM_memory_latency)->Repetitions(1)->RangeMultiplier(2)->Range(2 << 10, 2 << 24); +BENCHMARK(memory_latency)->Repetitions(1)->RangeMultiplier(2)->Range(2 << 10, 2 << 24); #endif // ARROW_WITH_BENCHMARKS_REFERENCE diff --git a/cpp/src/arrow/util/number-parsing-benchmark.cc b/cpp/src/arrow/util/number-parsing-benchmark.cc index d94011e35f0..e13ece90adc 100644 --- a/cpp/src/arrow/util/number-parsing-benchmark.cc +++ b/cpp/src/arrow/util/number-parsing-benchmark.cc @@ -69,7 +69,7 @@ static std::vector MakeTimestampStrings(int32_t num_items) { } template -static void BM_IntegerParsing(benchmark::State& state) { // NOLINT non-const reference +static void IntegerParsing(benchmark::State& state) { // NOLINT non-const reference auto strings = MakeIntStrings(1000); StringConverter converter; @@ -89,7 +89,7 @@ static void BM_IntegerParsing(benchmark::State& state) { // NOLINT non-const re } template -static void BM_FloatParsing(benchmark::State& state) { // NOLINT non-const reference +static void FloatParsing(benchmark::State& state) { // NOLINT non-const reference auto strings = MakeFloatStrings(1000); StringConverter converter; @@ -109,7 +109,7 @@ static void BM_FloatParsing(benchmark::State& state) { // NOLINT non-const refe } template -static void BM_TimestampParsing(benchmark::State& state) { // NOLINT non-const reference +static void TimestampParsing(benchmark::State& state) { // NOLINT non-const reference using c_type = TimestampType::c_type; auto strings = MakeTimestampStrings(1000); @@ -131,22 +131,22 @@ static void BM_TimestampParsing(benchmark::State& state) { // NOLINT non-const state.SetItemsProcessed(state.iterations() * strings.size()); } -BENCHMARK_TEMPLATE(BM_IntegerParsing, Int8Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, Int16Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, Int32Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, Int64Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, UInt8Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, UInt16Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, UInt32Type); -BENCHMARK_TEMPLATE(BM_IntegerParsing, UInt64Type); - -BENCHMARK_TEMPLATE(BM_FloatParsing, FloatType); -BENCHMARK_TEMPLATE(BM_FloatParsing, DoubleType); - -BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::SECOND); -BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::MILLI); -BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::MICRO); -BENCHMARK_TEMPLATE(BM_TimestampParsing, TimeUnit::NANO); +BENCHMARK_TEMPLATE(IntegerParsing, Int8Type); +BENCHMARK_TEMPLATE(IntegerParsing, Int16Type); +BENCHMARK_TEMPLATE(IntegerParsing, Int32Type); +BENCHMARK_TEMPLATE(IntegerParsing, Int64Type); +BENCHMARK_TEMPLATE(IntegerParsing, UInt8Type); +BENCHMARK_TEMPLATE(IntegerParsing, UInt16Type); +BENCHMARK_TEMPLATE(IntegerParsing, UInt32Type); +BENCHMARK_TEMPLATE(IntegerParsing, UInt64Type); + +BENCHMARK_TEMPLATE(FloatParsing, FloatType); +BENCHMARK_TEMPLATE(FloatParsing, DoubleType); + +BENCHMARK_TEMPLATE(TimestampParsing, TimeUnit::SECOND); +BENCHMARK_TEMPLATE(TimestampParsing, TimeUnit::MILLI); +BENCHMARK_TEMPLATE(TimestampParsing, TimeUnit::MICRO); +BENCHMARK_TEMPLATE(TimestampParsing, TimeUnit::NANO); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/thread-pool-benchmark.cc b/cpp/src/arrow/util/thread-pool-benchmark.cc index bcc88b83fda..56a492ea40f 100644 --- a/cpp/src/arrow/util/thread-pool-benchmark.cc +++ b/cpp/src/arrow/util/thread-pool-benchmark.cc @@ -75,8 +75,8 @@ struct Task { // This benchmark simply provides a baseline indicating the raw cost of our workload // depending on the workload size. Number of items / second in this (serial) -// benchmark can be compared to the numbers obtained in BM_ThreadPoolSpawn. -static void BM_WorkloadCost(benchmark::State& state) { +// benchmark can be compared to the numbers obtained in ThreadPoolSpawn. +static void WorkloadCost(benchmark::State& state) { const auto workload_size = static_cast(state.range(0)); Workload workload(workload_size); @@ -88,7 +88,7 @@ static void BM_WorkloadCost(benchmark::State& state) { } // Benchmark ThreadPool::Spawn -static void BM_ThreadPoolSpawn(benchmark::State& state) { +static void ThreadPoolSpawn(benchmark::State& state) { const auto nthreads = static_cast(state.range(0)); const auto workload_size = static_cast(state.range(1)); @@ -118,7 +118,7 @@ static void BM_ThreadPoolSpawn(benchmark::State& state) { } // Benchmark serial TaskGroup -static void BM_SerialTaskGroup(benchmark::State& state) { +static void SerialTaskGroup(benchmark::State& state) { const auto workload_size = static_cast(state.range(0)); Task task(workload_size); @@ -137,7 +137,7 @@ static void BM_SerialTaskGroup(benchmark::State& state) { } // Benchmark threaded TaskGroup -static void BM_ThreadedTaskGroup(benchmark::State& state) { +static void ThreadedTaskGroup(benchmark::State& state) { const auto nthreads = static_cast(state.range(0)); const auto workload_size = static_cast(state.range(1)); @@ -183,10 +183,10 @@ static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) { b->Repetitions(1); } -BENCHMARK(BM_WorkloadCost)->Apply(WorkloadCost_Customize); -BENCHMARK(BM_SerialTaskGroup)->Apply(WorkloadCost_Customize); -BENCHMARK(BM_ThreadPoolSpawn)->Apply(ThreadPoolSpawn_Customize); -BENCHMARK(BM_ThreadedTaskGroup)->Apply(ThreadPoolSpawn_Customize); +BENCHMARK(WorkloadCost)->Apply(WorkloadCost_Customize); +BENCHMARK(SerialTaskGroup)->Apply(WorkloadCost_Customize); +BENCHMARK(ThreadPoolSpawn)->Apply(ThreadPoolSpawn_Customize); +BENCHMARK(ThreadedTaskGroup)->Apply(ThreadPoolSpawn_Customize); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/trie-benchmark.cc b/cpp/src/arrow/util/trie-benchmark.cc index 7f21b8b958c..1ee2b3e3604 100644 --- a/cpp/src/arrow/util/trie-benchmark.cc +++ b/cpp/src/arrow/util/trie-benchmark.cc @@ -191,29 +191,29 @@ static void BenchmarkInlinedTrieLookups( state.SetItemsProcessed(state.iterations() * lookups.size()); } -static void BM_TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference +static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); } -static void BM_TrieLookupNotFound( +static void TrieLookupNotFound( benchmark::State& state) { // NOLINT non-const reference BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"}); } -static void BM_InlinedTrieLookupFound( +static void InlinedTrieLookupFound( benchmark::State& state) { // NOLINT non-const reference BenchmarkInlinedTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); } -static void BM_InlinedTrieLookupNotFound( +static void InlinedTrieLookupNotFound( benchmark::State& state) { // NOLINT non-const reference BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"}); } -BENCHMARK(BM_TrieLookupFound); -BENCHMARK(BM_TrieLookupNotFound); -BENCHMARK(BM_InlinedTrieLookupFound); -BENCHMARK(BM_InlinedTrieLookupNotFound); +BENCHMARK(TrieLookupFound); +BENCHMARK(TrieLookupNotFound); +BENCHMARK(InlinedTrieLookupFound); +BENCHMARK(InlinedTrieLookupNotFound); } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/util/utf8-util-benchmark.cc b/cpp/src/arrow/util/utf8-util-benchmark.cc index dd6eec6de4a..51110d42515 100644 --- a/cpp/src/arrow/util/utf8-util-benchmark.cc +++ b/cpp/src/arrow/util/utf8-util-benchmark.cc @@ -70,55 +70,55 @@ static void BenchmarkUTF8Validation( state.SetBytesProcessed(state.iterations() * s.size()); } -static void BM_ValidateTinyAscii(benchmark::State& state) { // NOLINT non-const reference +static void ValidateTinyAscii(benchmark::State& state) { // NOLINT non-const reference BenchmarkUTF8Validation(state, tiny_valid_ascii, true); } -static void BM_ValidateTinyNonAscii( +static void ValidateTinyNonAscii( benchmark::State& state) { // NOLINT non-const reference BenchmarkUTF8Validation(state, tiny_valid_non_ascii, true); } -static void BM_ValidateSmallAscii( +static void ValidateSmallAscii( benchmark::State& state) { // NOLINT non-const reference BenchmarkUTF8Validation(state, valid_ascii, true); } -static void BM_ValidateSmallAlmostAscii( +static void ValidateSmallAlmostAscii( benchmark::State& state) { // NOLINT non-const reference BenchmarkUTF8Validation(state, valid_almost_ascii, true); } -static void BM_ValidateSmallNonAscii( +static void ValidateSmallNonAscii( benchmark::State& state) { // NOLINT non-const reference BenchmarkUTF8Validation(state, valid_non_ascii, true); } -static void BM_ValidateLargeAscii( +static void ValidateLargeAscii( benchmark::State& state) { // NOLINT non-const reference auto s = MakeLargeString(valid_ascii, 100000); BenchmarkUTF8Validation(state, s, true); } -static void BM_ValidateLargeAlmostAscii( +static void ValidateLargeAlmostAscii( benchmark::State& state) { // NOLINT non-const reference auto s = MakeLargeString(valid_almost_ascii, 100000); BenchmarkUTF8Validation(state, s, true); } -static void BM_ValidateLargeNonAscii( +static void ValidateLargeNonAscii( benchmark::State& state) { // NOLINT non-const reference auto s = MakeLargeString(valid_non_ascii, 100000); BenchmarkUTF8Validation(state, s, true); } -BENCHMARK(BM_ValidateTinyAscii); -BENCHMARK(BM_ValidateTinyNonAscii); -BENCHMARK(BM_ValidateSmallAscii); -BENCHMARK(BM_ValidateSmallAlmostAscii); -BENCHMARK(BM_ValidateSmallNonAscii); -BENCHMARK(BM_ValidateLargeAscii); -BENCHMARK(BM_ValidateLargeAlmostAscii); -BENCHMARK(BM_ValidateLargeNonAscii); +BENCHMARK(ValidateTinyAscii); +BENCHMARK(ValidateTinyNonAscii); +BENCHMARK(ValidateSmallAscii); +BENCHMARK(ValidateSmallAlmostAscii); +BENCHMARK(ValidateSmallNonAscii); +BENCHMARK(ValidateLargeAscii); +BENCHMARK(ValidateLargeAlmostAscii); +BENCHMARK(ValidateLargeNonAscii); } // namespace util } // namespace arrow From d8e779aabee8ad58c5dcd2d1bc32b3536d83c821 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Wed, 22 May 2019 13:52:57 -0400 Subject: [PATCH 11/14] Reformat --- cpp/src/arrow/builder-benchmark.cc | 3 +-- cpp/src/arrow/compute/compute-benchmark.cc | 5 ++--- cpp/src/arrow/csv/converter-benchmark.cc | 6 ++---- cpp/src/arrow/csv/parser-benchmark.cc | 12 ++++-------- cpp/src/arrow/gpu/cuda-benchmark.cc | 5 +---- cpp/src/arrow/util/compression-benchmark.cc | 11 +++++------ cpp/src/arrow/util/int-util-benchmark.cc | 6 ++---- cpp/src/arrow/util/trie-benchmark.cc | 3 +-- cpp/src/arrow/util/utf8-util-benchmark.cc | 12 +++++------- 9 files changed, 23 insertions(+), 40 deletions(-) diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index c5e8093d72d..94870667726 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -140,8 +140,7 @@ static void BuildBooleanArrayNoNulls( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BuildBinaryArray( - benchmark::State& state) { // NOLINT non-const reference +static void BuildBinaryArray(benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { BinaryBuilder builder; diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index a3af8c57029..449504121f5 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -203,9 +203,8 @@ BENCHMARK(BuildStringDictionary); constexpr int kHashBenchmarkLength = 1 << 22; -#define ADD_HASH_ARGS(WHAT) \ - WHAT->Args({kHashBenchmarkLength, 1 << 10}) \ - ->Args({kHashBenchmarkLength, 10 * 1 << 10}) \ +#define ADD_HASH_ARGS(WHAT) \ + WHAT->Args({kHashBenchmarkLength, 1 << 10})->Args({kHashBenchmarkLength, 10 * 1 << 10}) ADD_HASH_ARGS(BENCHMARK(UniqueInt64NoNulls)); ADD_HASH_ARGS(BENCHMARK(UniqueInt64WithNulls)); diff --git a/cpp/src/arrow/csv/converter-benchmark.cc b/cpp/src/arrow/csv/converter-benchmark.cc index 1db32140764..e128e7b3f8e 100644 --- a/cpp/src/arrow/csv/converter-benchmark.cc +++ b/cpp/src/arrow/csv/converter-benchmark.cc @@ -76,16 +76,14 @@ static void BenchmarkConversion(benchmark::State& state, // NOLINT non-const re constexpr size_t num_rows = 10000; -static void Int64Conversion( - benchmark::State& state) { // NOLINT non-const reference +static void Int64Conversion(benchmark::State& state) { // NOLINT non-const reference auto parser = BuildInt64Data(num_rows); auto options = ConvertOptions::Defaults(); BenchmarkConversion(state, *parser, int64(), options); } -static void FloatConversion( - benchmark::State& state) { // NOLINT non-const reference +static void FloatConversion(benchmark::State& state) { // NOLINT non-const reference auto parser = BuildFloatData(num_rows); auto options = ConvertOptions::Defaults(); diff --git a/cpp/src/arrow/csv/parser-benchmark.cc b/cpp/src/arrow/csv/parser-benchmark.cc index 31250f0fb8b..5c9af39d56f 100644 --- a/cpp/src/arrow/csv/parser-benchmark.cc +++ b/cpp/src/arrow/csv/parser-benchmark.cc @@ -56,8 +56,7 @@ static void BenchmarkCSVChunking(benchmark::State& state, // NOLINT non-const r state.SetBytesProcessed(state.iterations() * csv_size); } -static void ChunkCSVQuotedBlock( - benchmark::State& state) { // NOLINT non-const reference +static void ChunkCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; @@ -67,8 +66,7 @@ static void ChunkCSVQuotedBlock( BenchmarkCSVChunking(state, csv, options); } -static void ChunkCSVEscapedBlock( - benchmark::State& state) { // NOLINT non-const reference +static void ChunkCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = false; @@ -121,8 +119,7 @@ static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const re state.SetBytesProcessed(state.iterations() * csv_size); } -static void ParseCSVQuotedBlock( - benchmark::State& state) { // NOLINT non-const reference +static void ParseCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; @@ -131,8 +128,7 @@ static void ParseCSVQuotedBlock( BenchmarkCSVParsing(state, csv, num_rows, options); } -static void ParseCSVEscapedBlock( - benchmark::State& state) { // NOLINT non-const reference +static void ParseCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const reference auto csv = BuildCsvData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = false; diff --git a/cpp/src/arrow/gpu/cuda-benchmark.cc b/cpp/src/arrow/gpu/cuda-benchmark.cc index 96c3d0f6559..267d64a1776 100644 --- a/cpp/src/arrow/gpu/cuda-benchmark.cc +++ b/cpp/src/arrow/gpu/cuda-benchmark.cc @@ -82,10 +82,7 @@ static void Writer_Unbuffered(benchmark::State& state) { } // Vary chunk write size from 256 bytes to 64K -BENCHMARK(Writer_Buffered) - ->RangeMultiplier(16) - ->Range(1 << 8, 1 << 16) - ->UseRealTime(); +BENCHMARK(Writer_Buffered)->RangeMultiplier(16)->Range(1 << 8, 1 << 16)->UseRealTime(); BENCHMARK(Writer_Unbuffered) ->RangeMultiplier(4) diff --git a/cpp/src/arrow/util/compression-benchmark.cc b/cpp/src/arrow/util/compression-benchmark.cc index 8595ae5f80d..a69bb447d9d 100644 --- a/cpp/src/arrow/util/compression-benchmark.cc +++ b/cpp/src/arrow/util/compression-benchmark.cc @@ -111,9 +111,9 @@ int64_t StreamingCompress(Codec* codec, const std::vector& data, return compressed_size; } -static void StreamingCompression( - Compression::type compression, const std::vector& data, - benchmark::State& state) { // NOLINT non-const reference +static void StreamingCompression(Compression::type compression, + const std::vector& data, + benchmark::State& state) { // NOLINT non-const reference std::unique_ptr codec; ABORT_NOT_OK(Codec::Create(compression, &codec)); @@ -126,9 +126,8 @@ static void StreamingCompression( } template -static void StreamingCompression( - benchmark::State& state) { // NOLINT non-const reference - auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB +static void StreamingCompression(benchmark::State& state) { // NOLINT non-const reference + auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB StreamingCompression(COMPRESSION, data, state); } diff --git a/cpp/src/arrow/util/int-util-benchmark.cc b/cpp/src/arrow/util/int-util-benchmark.cc index 89356969c27..1b306ed946b 100644 --- a/cpp/src/arrow/util/int-util-benchmark.cc +++ b/cpp/src/arrow/util/int-util-benchmark.cc @@ -60,8 +60,7 @@ static void DetectUIntWidthNoNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -static void DetectUIntWidthNulls( - benchmark::State& state) { // NOLINT non-const reference +static void DetectUIntWidthNulls(benchmark::State& state) { // NOLINT non-const reference const auto values = GetUIntSequence(0x12345); const auto valid_bytes = GetValidBytes(0x12345); @@ -84,8 +83,7 @@ static void DetectIntWidthNoNulls( state.SetBytesProcessed(state.iterations() * values.size() * sizeof(uint64_t)); } -static void DetectIntWidthNulls( - benchmark::State& state) { // NOLINT non-const reference +static void DetectIntWidthNulls(benchmark::State& state) { // NOLINT non-const reference const auto values = GetIntSequence(0x12345, -0x1234); const auto valid_bytes = GetValidBytes(0x12345); diff --git a/cpp/src/arrow/util/trie-benchmark.cc b/cpp/src/arrow/util/trie-benchmark.cc index 1ee2b3e3604..33b88d023f2 100644 --- a/cpp/src/arrow/util/trie-benchmark.cc +++ b/cpp/src/arrow/util/trie-benchmark.cc @@ -195,8 +195,7 @@ static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const refe BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); } -static void TrieLookupNotFound( - benchmark::State& state) { // NOLINT non-const reference +static void TrieLookupNotFound(benchmark::State& state) { // NOLINT non-const reference BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"}); } diff --git a/cpp/src/arrow/util/utf8-util-benchmark.cc b/cpp/src/arrow/util/utf8-util-benchmark.cc index 51110d42515..51f7b2ae050 100644 --- a/cpp/src/arrow/util/utf8-util-benchmark.cc +++ b/cpp/src/arrow/util/utf8-util-benchmark.cc @@ -74,14 +74,13 @@ static void ValidateTinyAscii(benchmark::State& state) { // NOLINT non-const re BenchmarkUTF8Validation(state, tiny_valid_ascii, true); } -static void ValidateTinyNonAscii( - benchmark::State& state) { // NOLINT non-const reference +static void ValidateTinyNonAscii(benchmark::State& state) { // NOLINT non-const reference BenchmarkUTF8Validation(state, tiny_valid_non_ascii, true); } -static void ValidateSmallAscii( - benchmark::State& state) { // NOLINT non-const reference - BenchmarkUTF8Validation(state, valid_ascii, true); } +static void ValidateSmallAscii(benchmark::State& state) { // NOLINT non-const reference + BenchmarkUTF8Validation(state, valid_ascii, true); +} static void ValidateSmallAlmostAscii( benchmark::State& state) { // NOLINT non-const reference @@ -93,8 +92,7 @@ static void ValidateSmallNonAscii( BenchmarkUTF8Validation(state, valid_non_ascii, true); } -static void ValidateLargeAscii( - benchmark::State& state) { // NOLINT non-const reference +static void ValidateLargeAscii(benchmark::State& state) { // NOLINT non-const reference auto s = MakeLargeString(valid_ascii, 100000); BenchmarkUTF8Validation(state, s, true); } From a9ecffd9810a8464077c24e4fa17fef5f7e8116f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Tue, 28 May 2019 11:27:41 -0400 Subject: [PATCH 12/14] address comments --- cpp/src/arrow/builder-benchmark.cc | 35 +++--- cpp/src/arrow/json/parser-benchmark.cc | 4 +- cpp/src/arrow/util/bit-util-benchmark.cc | 29 +++-- cpp/src/arrow/util/compression-benchmark.cc | 27 +++-- cpp/src/arrow/util/thread-pool-benchmark.cc | 37 ++++--- cpp/src/arrow/util/trie-benchmark.cc | 116 ++++++++++---------- dev/archery/archery/benchmark/google.py | 7 +- dev/archery/archery/benchmark/runner.py | 8 +- dev/archery/archery/cli.py | 21 ++-- 9 files changed, 152 insertions(+), 132 deletions(-) diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index 94870667726..e13b75e73b9 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -47,22 +47,22 @@ static VectorType AlmostU8CompressibleVector() { return data; } -constexpr int64_t kFinalSize = 256; +constexpr int64_t kRounds = 256; static VectorType kData = AlmostU8CompressibleVector(); constexpr int64_t kBytesProcessPerRound = kNumberOfElements * sizeof(ValueType); -constexpr int64_t kBytesProcessed = kFinalSize * kBytesProcessPerRound; +constexpr int64_t kBytesProcessed = kRounds * kBytesProcessPerRound; static const char* kBinaryString = "12345678"; static arrow::util::string_view kBinaryView(kBinaryString); // This benchmarks acts as a reference to the native std::vector -// implementation. It appends kFinalSize chunks into a vector. +// implementation. It appends kRounds chunks into a vector. static void ReferenceBuildVectorNoNulls( benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { std::vector builder; - for (int i = 0; i < kFinalSize; i++) { + for (int i = 0; i < kRounds; i++) { builder.insert(builder.end(), kData.cbegin(), kData.cend()); } } @@ -70,12 +70,11 @@ static void ReferenceBuildVectorNoNulls( state.SetBytesProcessed(state.iterations() * kBytesProcessed); } -static void BuildPrimitiveArrayNoNulls( - benchmark::State& state) { // NOLINT non-const reference +static void BuildIntArrayNoNulls(benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { Int64Builder builder; - for (int i = 0; i < kFinalSize; i++) { + for (int i = 0; i < kRounds; i++) { ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr)); } @@ -91,7 +90,7 @@ static void BuildAdaptiveIntNoNulls( for (auto _ : state) { AdaptiveIntBuilder builder; - for (int i = 0; i < kFinalSize; i++) { + for (int i = 0; i < kRounds; i++) { ABORT_NOT_OK(builder.AppendValues(kData.data(), kData.size(), nullptr)); } @@ -107,7 +106,7 @@ static void BuildAdaptiveIntNoNullsScalarAppend( for (auto _ : state) { AdaptiveIntBuilder builder; - for (int i = 0; i < kFinalSize; i++) { + for (int i = 0; i < kRounds; i++) { for (size_t j = 0; j < kData.size(); j++) { ABORT_NOT_OK(builder.Append(kData[i])) } @@ -123,13 +122,13 @@ static void BuildAdaptiveIntNoNullsScalarAppend( static void BuildBooleanArrayNoNulls( benchmark::State& state) { // NOLINT non-const reference - size_t n_bytes = kData.size() * sizeof(ValueType); + size_t n_bytes = kBytesProcessPerRound; const uint8_t* data = reinterpret_cast(kData.data()); for (auto _ : state) { BooleanBuilder builder; - for (int i = 0; i < kFinalSize; i++) { + for (int i = 0; i < kRounds; i++) { ABORT_NOT_OK(builder.AppendValues(data, n_bytes)); } @@ -144,7 +143,7 @@ static void BuildBinaryArray(benchmark::State& state) { // NOLINT non-const ref for (auto _ : state) { BinaryBuilder builder; - for (int64_t i = 0; i < kFinalSize * kNumberOfElements; i++) { + for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) { ABORT_NOT_OK(builder.Append(kBinaryView)); } @@ -163,7 +162,7 @@ static void BuildChunkedBinaryArray( for (auto _ : state) { internal::ChunkedBinaryBuilder builder(kChunkSize); - for (int64_t i = 0; i < kFinalSize * kNumberOfElements; i++) { + for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) { ABORT_NOT_OK(builder.Append(kBinaryView)); } @@ -181,7 +180,7 @@ static void BuildFixedSizeBinaryArray( for (auto _ : state) { FixedSizeBinaryBuilder builder(type); - for (int64_t i = 0; i < kFinalSize * kNumberOfElements; i++) { + for (int64_t i = 0; i < kRounds * kNumberOfElements; i++) { ABORT_NOT_OK(builder.Append(kBinaryView)); } @@ -292,7 +291,7 @@ static void BenchmarkScalarDictionaryArray( for (auto _ : state) { DictionaryBuilder builder(default_memory_pool()); - for (int64_t i = 0; i < kFinalSize; i++) { + for (int64_t i = 0; i < kRounds; i++) { for (const auto value : fodder) { ABORT_NOT_OK(builder.Append(value)); } @@ -333,7 +332,7 @@ static void BuildStringDictionaryArray( for (auto _ : state) { BinaryDictionaryBuilder builder(default_memory_pool()); - for (int64_t i = 0; i < kFinalSize; i++) { + for (int64_t i = 0; i < kRounds; i++) { for (const auto& value : fodder) { ABORT_NOT_OK(builder.Append(value)); } @@ -343,7 +342,7 @@ static void BuildStringDictionaryArray( ABORT_NOT_OK(builder.Finish(&out)); } - state.SetBytesProcessed(state.iterations() * fodder_size * kFinalSize); + state.SetBytesProcessed(state.iterations() * fodder_size * kRounds); } static void ArrayDataConstructDestruct( @@ -371,7 +370,7 @@ BENCHMARK(ReferenceBuildVectorNoNulls); BENCHMARK(BuildBooleanArrayNoNulls); -BENCHMARK(BuildPrimitiveArrayNoNulls); +BENCHMARK(BuildIntArrayNoNulls); BENCHMARK(BuildAdaptiveIntNoNulls); BENCHMARK(BuildAdaptiveIntNoNullsScalarAppend); diff --git a/cpp/src/arrow/json/parser-benchmark.cc b/cpp/src/arrow/json/parser-benchmark.cc index 40c357d819e..66ef9ece425 100644 --- a/cpp/src/arrow/json/parser-benchmark.cc +++ b/cpp/src/arrow/json/parser-benchmark.cc @@ -150,7 +150,7 @@ static void ReadJSONBlockWithSchemaSingleThread( BenchmarkReadJSONBlockWithSchema(state, false); } -static void ReferenceReadJSONBlockWithSchemaMultiThread( +static void ReadJSONBlockWithSchemaMultiThread( benchmark::State& state) { // NOLINT non-const reference BenchmarkReadJSONBlockWithSchema(state, true); } @@ -160,7 +160,7 @@ BENCHMARK(ChunkJSONLineDelimited); BENCHMARK(ParseJSONBlockWithSchema); BENCHMARK(ReadJSONBlockWithSchemaSingleThread); -BENCHMARK(ReferenceReadJSONBlockWithSchemaMultiThread)->UseRealTime(); +BENCHMARK(ReadJSONBlockWithSchemaMultiThread)->UseRealTime(); } // namespace json } // namespace arrow diff --git a/cpp/src/arrow/util/bit-util-benchmark.cc b/cpp/src/arrow/util/bit-util-benchmark.cc index 488a56a5ade..5131ceb88d1 100644 --- a/cpp/src/arrow/util/bit-util-benchmark.cc +++ b/cpp/src/arrow/util/bit-util-benchmark.cc @@ -30,6 +30,8 @@ namespace arrow { namespace BitUtil { +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + // A naive bitmap reader implementation, meant as a baseline against // internal::BitmapReader @@ -82,6 +84,8 @@ class NaiveBitmapWriter { int64_t position_; }; +#endif + static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { std::shared_ptr buffer; ABORT_NOT_OK(AllocateBuffer(nbytes, &buffer)); @@ -174,18 +178,10 @@ static void BenchmarkGenerateBits(benchmark::State& state, int64_t nbytes) { state.SetBytesProcessed(state.iterations() * nbytes); } -static void ReferenceNaiveBitmapReader(benchmark::State& state) { - BenchmarkBitmapReader(state, state.range(0)); -} - static void BitmapReader(benchmark::State& state) { BenchmarkBitmapReader(state, state.range(0)); } -static void ReferenceNaiveBitmapWriter(benchmark::State& state) { - BenchmarkBitmapWriter(state, state.range(0)); -} - static void BitmapWriter(benchmark::State& state) { BenchmarkBitmapWriter(state, state.range(0)); } @@ -249,12 +245,25 @@ static void CopyBitmapWithOffset(benchmark::State& state) { // NOLINT non-const CopyBitmap<4>(state); } +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + +static void ReferenceNaiveBitmapReader(benchmark::State& state) { + BenchmarkBitmapReader(state, state.range(0)); +} + +static void ReferenceNaiveBitmapWriter(benchmark::State& state) { + BenchmarkBitmapWriter(state, state.range(0)); +} + +BENCHMARK(ReferenceNaiveBitmapWriter)->Arg(kBufferSize); +BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize); + +#endif + BENCHMARK(CopyBitmapWithoutOffset)->Arg(kBufferSize); BENCHMARK(CopyBitmapWithOffset)->Arg(kBufferSize); -BENCHMARK(ReferenceNaiveBitmapReader)->Arg(kBufferSize); BENCHMARK(BitmapReader)->Arg(kBufferSize); -BENCHMARK(ReferenceNaiveBitmapWriter)->Arg(kBufferSize); BENCHMARK(BitmapWriter)->Arg(kBufferSize); BENCHMARK(FirstTimeBitmapWriter)->Arg(kBufferSize); diff --git a/cpp/src/arrow/util/compression-benchmark.cc b/cpp/src/arrow/util/compression-benchmark.cc index a69bb447d9d..28bc1255c4b 100644 --- a/cpp/src/arrow/util/compression-benchmark.cc +++ b/cpp/src/arrow/util/compression-benchmark.cc @@ -29,6 +29,8 @@ namespace arrow { namespace util { +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + std::vector MakeCompressibleData(int data_size) { // XXX This isn't a real-world corpus so doesn't really represent the // comparative qualities of the algorithms @@ -126,8 +128,9 @@ static void StreamingCompression(Compression::type compression, } template -static void StreamingCompression(benchmark::State& state) { // NOLINT non-const reference - auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB +static void ReferenceStreamingCompression( + benchmark::State& state) { // NOLINT non-const reference + auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB StreamingCompression(COMPRESSION, data, state); } @@ -172,22 +175,24 @@ static void StreamingDecompression( } template -static void StreamingDecompression( +static void ReferenceStreamingDecompression( benchmark::State& state) { // NOLINT non-const reference auto data = MakeCompressibleData(8 * 1024 * 1024); // 8 MB StreamingDecompression(COMPRESSION, data, state); } -BENCHMARK_TEMPLATE(StreamingCompression, Compression::GZIP); -BENCHMARK_TEMPLATE(StreamingCompression, Compression::BROTLI); -BENCHMARK_TEMPLATE(StreamingCompression, Compression::ZSTD); -BENCHMARK_TEMPLATE(StreamingCompression, Compression::LZ4); +BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::GZIP); +BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::BROTLI); +BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::ZSTD); +BENCHMARK_TEMPLATE(ReferenceStreamingCompression, Compression::LZ4); + +BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::GZIP); +BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::BROTLI); +BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::ZSTD); +BENCHMARK_TEMPLATE(ReferenceStreamingDecompression, Compression::LZ4); -BENCHMARK_TEMPLATE(StreamingDecompression, Compression::GZIP); -BENCHMARK_TEMPLATE(StreamingDecompression, Compression::BROTLI); -BENCHMARK_TEMPLATE(StreamingDecompression, Compression::ZSTD); -BENCHMARK_TEMPLATE(StreamingDecompression, Compression::LZ4); +#endif } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/util/thread-pool-benchmark.cc b/cpp/src/arrow/util/thread-pool-benchmark.cc index 56a492ea40f..b10a5d194dc 100644 --- a/cpp/src/arrow/util/thread-pool-benchmark.cc +++ b/cpp/src/arrow/util/thread-pool-benchmark.cc @@ -73,20 +73,6 @@ struct Task { Workload workload_; }; -// This benchmark simply provides a baseline indicating the raw cost of our workload -// depending on the workload size. Number of items / second in this (serial) -// benchmark can be compared to the numbers obtained in ThreadPoolSpawn. -static void WorkloadCost(benchmark::State& state) { - const auto workload_size = static_cast(state.range(0)); - - Workload workload(workload_size); - for (auto _ : state) { - workload(); - } - - state.SetItemsProcessed(state.iterations()); -} - // Benchmark ThreadPool::Spawn static void ThreadPoolSpawn(benchmark::State& state) { const auto nthreads = static_cast(state.range(0)); @@ -169,7 +155,6 @@ static void WorkloadCost_Customize(benchmark::internal::Benchmark* b) { } b->ArgNames({"task_cost"}); b->UseRealTime(); - b->Repetitions(1); } static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) { @@ -180,10 +165,28 @@ static void ThreadPoolSpawn_Customize(benchmark::internal::Benchmark* b) { } b->ArgNames({"threads", "task_cost"}); b->UseRealTime(); - b->Repetitions(1); } -BENCHMARK(WorkloadCost)->Apply(WorkloadCost_Customize); +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + +// This benchmark simply provides a baseline indicating the raw cost of our workload +// depending on the workload size. Number of items / second in this (serial) +// benchmark can be compared to the numbers obtained in ThreadPoolSpawn. +static void WorkloadCost(benchmark::State& state) { + const auto workload_size = static_cast(state.range(0)); + + Workload workload(workload_size); + for (auto _ : state) { + workload(); + } + + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(ReferenceWorkloadCost)->Apply(WorkloadCost_Customize); + +#endif + BENCHMARK(SerialTaskGroup)->Apply(WorkloadCost_Customize); BENCHMARK(ThreadPoolSpawn)->Apply(ThreadPoolSpawn_Customize); BENCHMARK(ThreadedTaskGroup)->Apply(ThreadPoolSpawn_Customize); diff --git a/cpp/src/arrow/util/trie-benchmark.cc b/cpp/src/arrow/util/trie-benchmark.cc index 33b88d023f2..868accc3744 100644 --- a/cpp/src/arrow/util/trie-benchmark.cc +++ b/cpp/src/arrow/util/trie-benchmark.cc @@ -28,6 +28,64 @@ namespace arrow { namespace internal { +std::vector AllNulls() { + return {"#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", + "1.#QNAN", "N/A", "NA", "NULL", "NaN", "n/a", "nan", "null"}; +} + +Trie MakeNullsTrie() { + auto nulls = AllNulls(); + + TrieBuilder builder; + for (const auto& str : AllNulls()) { + ABORT_NOT_OK(builder.Append(str)); + } + return builder.Finish(); +} + +std::vector Expand(const std::vector& base, size_t n) { + std::vector result; + result.reserve(n); + + while (true) { + for (const auto& v : base) { + result.push_back(v); + if (result.size() == n) { + return result; + } + } + } +} + +static void BenchmarkTrieLookups(benchmark::State& state, // NOLINT non-const reference + const std::vector& strings) { + Trie trie = MakeNullsTrie(); + int32_t total = 0; + + auto lookups = Expand(strings, 100); + + for (auto _ : state) { + for (const auto& s : lookups) { + total += trie.Find(s); + } + } + benchmark::DoNotOptimize(total); + state.SetItemsProcessed(state.iterations() * lookups.size()); +} + +static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference + BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); +} + +static void TrieLookupNotFound(benchmark::State& state) { // NOLINT non-const reference + BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"}); +} + +BENCHMARK(TrieLookupFound); +BENCHMARK(TrieLookupNotFound); + +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + static inline bool InlinedNullLookup(util::string_view s) { // An inlined version of trie lookup for a specific set of strings // (see AllNulls()) @@ -130,51 +188,6 @@ static inline bool InlinedNullLookup(util::string_view s) { } } -std::vector AllNulls() { - return {"#N/A", "#N/A N/A", "#NA", "-1.#IND", "-1.#QNAN", "-NaN", "-nan", "1.#IND", - "1.#QNAN", "N/A", "NA", "NULL", "NaN", "n/a", "nan", "null"}; -} - -Trie MakeNullsTrie() { - auto nulls = AllNulls(); - - TrieBuilder builder; - for (const auto& str : AllNulls()) { - ABORT_NOT_OK(builder.Append(str)); - } - return builder.Finish(); -} - -std::vector Expand(const std::vector& base, size_t n) { - std::vector result; - result.reserve(n); - - while (true) { - for (const auto& v : base) { - result.push_back(v); - if (result.size() == n) { - return result; - } - } - } -} - -static void BenchmarkTrieLookups(benchmark::State& state, // NOLINT non-const reference - const std::vector& strings) { - Trie trie = MakeNullsTrie(); - int32_t total = 0; - - auto lookups = Expand(strings, 100); - - for (auto _ : state) { - for (const auto& s : lookups) { - total += trie.Find(s); - } - } - benchmark::DoNotOptimize(total); - state.SetItemsProcessed(state.iterations() * lookups.size()); -} - static void BenchmarkInlinedTrieLookups( benchmark::State& state, // NOLINT non-const reference const std::vector& strings) { @@ -190,15 +203,6 @@ static void BenchmarkInlinedTrieLookups( benchmark::DoNotOptimize(total); state.SetItemsProcessed(state.iterations() * lookups.size()); } - -static void TrieLookupFound(benchmark::State& state) { // NOLINT non-const reference - BenchmarkTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); -} - -static void TrieLookupNotFound(benchmark::State& state) { // NOLINT non-const reference - BenchmarkTrieLookups(state, {"None", "1.0", "", "abc"}); -} - static void InlinedTrieLookupFound( benchmark::State& state) { // NOLINT non-const reference BenchmarkInlinedTrieLookups(state, {"N/A", "null", "-1.#IND", "N/A"}); @@ -209,10 +213,10 @@ static void InlinedTrieLookupNotFound( BenchmarkInlinedTrieLookups(state, {"None", "1.0", "", "abc"}); } -BENCHMARK(TrieLookupFound); -BENCHMARK(TrieLookupNotFound); BENCHMARK(InlinedTrieLookupFound); BENCHMARK(InlinedTrieLookupNotFound); +#endif + } // namespace internal } // namespace arrow diff --git a/dev/archery/archery/benchmark/google.py b/dev/archery/archery/benchmark/google.py index 8ef25b3de28..49e6ad1b05d 100644 --- a/dev/archery/archery/benchmark/google.py +++ b/dev/archery/archery/benchmark/google.py @@ -122,12 +122,7 @@ def time(self): @property def value(self): """ Return the benchmark value.""" - if self.bytes_per_second: - return self.bytes_per_second - elif self.items_per_second: - return self.items_per_second - else: - return self.time + return self.bytes_per_second or self.items_per_second or self.time @property def unit(self): diff --git a/dev/archery/archery/benchmark/runner.py b/dev/archery/archery/benchmark/runner.py index 099fa68c497..5cee7782c8b 100644 --- a/dev/archery/archery/benchmark/runner.py +++ b/dev/archery/archery/benchmark/runner.py @@ -85,6 +85,12 @@ def __init__(self, suites, **kwargs): self._suites = suites super().__init__(**kwargs) + @property + def list_benchmarks(self): + for suite in self._suites: + for benchmark in suite.benchmarks: + yield f"{suite.name}.{benchmark.name}" + @property def suites(self): suite_fn = regex_filter(self.suite_filter) @@ -147,7 +153,7 @@ def suite(self, name, suite_bin): return BenchmarkSuite(name, benchmarks) @property - def list(self): + def list_benchmarks(self): for suite_name, suite_bin in self.suites_binaries.items(): suite_cmd = GoogleBenchmarkCommand(suite_bin) for benchmark_name in suite_cmd.list_benchmarks(): diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 1d67624c3c5..e2dd9ea1c9b 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -186,24 +186,24 @@ def benchmark(ctx): @click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " "Can be stacked") -@click.argument("baseline", metavar="[]", default="WORKSPACE", +@click.argument("rev_or_path", metavar="[]", default="WORKSPACE", required=False) @click.pass_context -def benchmark_list(ctx, src, preserve, output, cmake_extras, baseline): +def benchmark_list(ctx, src, preserve, output, cmake_extras, rev_or_path): """ List benchmark suite. """ with tmpdir(preserve) as root: - logger.debug(f"Running benchmark {baseline}") + logger.debug(f"Running benchmark {rev_or_path}") conf = CppConfiguration( build_type="release", with_tests=True, with_benchmarks=True, with_python=False, cmake_extras=cmake_extras) runner_base = BenchmarkRunner.from_rev_or_path( - src, root, baseline, conf) + src, root, rev_or_path, conf) - for b in runner_base.list: - print(b, file=output) + for b in runner_base.list_benchmarks: + click.echo(b, file=output) @benchmark.command(name="run", short_help="Run benchmark suite") @@ -224,11 +224,11 @@ def benchmark_list(ctx, src, preserve, output, cmake_extras, baseline): @click.option("--cmake-extras", type=str, multiple=True, help="Extra flags/options to pass to cmake invocation. " "Can be stacked") -@click.argument("baseline", metavar="[]", default="WORKSPACE", +@click.argument("rev_or_path", metavar="[]", default="WORKSPACE", required=False) @click.pass_context def benchmark_run(ctx, src, preserve, suite_filter, benchmark_filter, - output, cmake_extras, baseline): + output, cmake_extras, rev_or_path): """ Run benchmark suite. This command will run the benchmark suite for a single build. This is @@ -237,7 +237,6 @@ def benchmark_run(ctx, src, preserve, suite_filter, benchmark_filter, The caller can optionally specify a target which is either a git revision (commit, tag, special values like HEAD) or a cmake build directory. - When a commit is referenced, a local clone of the arrow sources (specified via --src) is performed and the proper branch is created. This is done in a temporary directory which can be left intact with the `---preserve` flag. @@ -263,14 +262,14 @@ def benchmark_run(ctx, src, preserve, suite_filter, benchmark_filter, archery benchmark run --output=run.json """ with tmpdir(preserve) as root: - logger.debug(f"Running benchmark {baseline}") + logger.debug(f"Running benchmark {rev_or_path}") conf = CppConfiguration( build_type="release", with_tests=True, with_benchmarks=True, with_python=False, cmake_extras=cmake_extras) runner_base = BenchmarkRunner.from_rev_or_path( - src, root, baseline, conf, + src, root, rev_or_path, conf, suite_filter=suite_filter, benchmark_filter=benchmark_filter) json.dump(runner_base, output, cls=JsonEncoder) From 4a570ab90fd3e73c8834a72cafe3adaa302f4bad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Wed, 29 May 2019 09:25:28 -0400 Subject: [PATCH 13/14] Reformat --- cpp/cmake_modules/DefineOptions.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index 54844927bb1..8b3d80e04da 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -99,8 +99,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") # Reference benchmarks are used to compare to naive implementation, or # discover various hardware limits. define_option(ARROW_BUILD_BENCHMARKS_REFERENCE - "Build the Arrow micro reference benchmarks, default OFF." - OFF) + "Build the Arrow micro reference benchmarks, default OFF." OFF) define_option_string(ARROW_TEST_LINKAGE "Linkage of Arrow libraries with unit tests executables." From 83780304d63e17cdf0045c9463e47fb5b0a2e875 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Saint-Jacques?= Date: Thu, 30 May 2019 08:30:45 -0400 Subject: [PATCH 14/14] Address comments --- cpp/src/arrow/builder-benchmark.cc | 34 +++++++++++++++------------ cpp/src/arrow/csv/parser-benchmark.cc | 17 +++++++------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/builder-benchmark.cc b/cpp/src/arrow/builder-benchmark.cc index e13b75e73b9..bc36970f048 100644 --- a/cpp/src/arrow/builder-benchmark.cc +++ b/cpp/src/arrow/builder-benchmark.cc @@ -55,21 +55,6 @@ constexpr int64_t kBytesProcessed = kRounds * kBytesProcessPerRound; static const char* kBinaryString = "12345678"; static arrow::util::string_view kBinaryView(kBinaryString); -// This benchmarks acts as a reference to the native std::vector -// implementation. It appends kRounds chunks into a vector. -static void ReferenceBuildVectorNoNulls( - benchmark::State& state) { // NOLINT non-const reference - for (auto _ : state) { - std::vector builder; - - for (int i = 0; i < kRounds; i++) { - builder.insert(builder.end(), kData.cbegin(), kData.cend()); - } - } - - state.SetBytesProcessed(state.iterations() * kBytesProcessed); -} - static void BuildIntArrayNoNulls(benchmark::State& state) { // NOLINT non-const reference for (auto _ : state) { Int64Builder builder; @@ -366,8 +351,27 @@ static void ArrayDataConstructDestruct( // Benchmark declarations // +#ifdef ARROW_WITH_BENCHMARKS_REFERENCE + +// This benchmarks acts as a reference to the native std::vector +// implementation. It appends kRounds chunks into a vector. +static void ReferenceBuildVectorNoNulls( + benchmark::State& state) { // NOLINT non-const reference + for (auto _ : state) { + std::vector builder; + + for (int i = 0; i < kRounds; i++) { + builder.insert(builder.end(), kData.cbegin(), kData.cend()); + } + } + + state.SetBytesProcessed(state.iterations() * kBytesProcessed); +} + BENCHMARK(ReferenceBuildVectorNoNulls); +#endif + BENCHMARK(BuildBooleanArrayNoNulls); BENCHMARK(BuildIntArrayNoNulls); diff --git a/cpp/src/arrow/csv/parser-benchmark.cc b/cpp/src/arrow/csv/parser-benchmark.cc index 5c9af39d56f..c474af5c97a 100644 --- a/cpp/src/arrow/csv/parser-benchmark.cc +++ b/cpp/src/arrow/csv/parser-benchmark.cc @@ -28,13 +28,14 @@ namespace arrow { namespace csv { -// Can't have static str. +// Linter stipulates: +// >> For a static/global string constant, use a C style string instead const char* one_row = "abc,\"d,f\",12.34,\n"; const char* one_row_escaped = "abc,d\\,f,12.34,\n"; -size_t num_rows = (1024 * 8) / strlen(one_row); +size_t num_rows = (1024 * 64) / strlen(one_row); -static std::string BuildCsvData(const std::string& row, size_t repeat) { +static std::string BuildCSVData(const std::string& row, size_t repeat) { std::stringstream ss; for (size_t i = 0; i < repeat; ++i) { ss << row; @@ -57,7 +58,7 @@ static void BenchmarkCSVChunking(benchmark::State& state, // NOLINT non-const r } static void ChunkCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const reference - auto csv = BuildCsvData(one_row, num_rows); + auto csv = BuildCSVData(one_row, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; options.escaping = false; @@ -67,7 +68,7 @@ static void ChunkCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const } static void ChunkCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const reference - auto csv = BuildCsvData(one_row_escaped, num_rows); + auto csv = BuildCSVData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = false; options.escaping = true; @@ -78,7 +79,7 @@ static void ChunkCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const static void ChunkCSVNoNewlinesBlock( benchmark::State& state) { // NOLINT non-const reference - auto csv = BuildCsvData(one_row_escaped, num_rows); + auto csv = BuildCSVData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; options.escaping = false; @@ -120,7 +121,7 @@ static void BenchmarkCSVParsing(benchmark::State& state, // NOLINT non-const re } static void ParseCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const reference - auto csv = BuildCsvData(one_row, num_rows); + auto csv = BuildCSVData(one_row, num_rows); auto options = ParseOptions::Defaults(); options.quoting = true; options.escaping = false; @@ -129,7 +130,7 @@ static void ParseCSVQuotedBlock(benchmark::State& state) { // NOLINT non-const } static void ParseCSVEscapedBlock(benchmark::State& state) { // NOLINT non-const reference - auto csv = BuildCsvData(one_row_escaped, num_rows); + auto csv = BuildCSVData(one_row_escaped, num_rows); auto options = ParseOptions::Defaults(); options.quoting = false; options.escaping = true;