From f7b36194977ddb99348a707a283710ce9c4fb134 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 28 Nov 2017 18:22:07 -0500 Subject: [PATCH 1/3] Add initial Unique benchmarks for int64, strings Change-Id: I0c2eb14f1cd8c63a79fe2a3da308c76ac19a7384 --- cpp/src/arrow/compute/compute-benchmark.cc | 127 ++++++++++++++++++++- cpp/src/arrow/compute/compute-test.cc | 4 +- cpp/src/arrow/compute/kernels/hash.cc | 2 +- 3 files changed, 128 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index 974fffcd6d9..ee8df12609f 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -81,8 +81,131 @@ static void BM_BuildStringDictionary( state.SetBytesProcessed(state.iterations() * total_bytes); } -BENCHMARK(BM_BuildDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildStringDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); +template +struct HashParams { + using T = typename Type::c_type; + + double null_percent; + + void GenerateTestData(const int64_t length, const int64_t num_unique, + std::shared_ptr* arr) const { + std::vector draws; + std::vector values; + std::vector is_valid; + test::randint(length, 0, num_unique, &draws); + for (int64_t draw : draws) { + values.push_back(draw); + } + + if (this->null_percent > 0) { + test::random_is_valid(length, this->null_percent, &is_valid); + ArrayFromVector(is_valid, values, arr); + } else { + ArrayFromVector(values, arr); + } + } + + int64_t GetBytesProcessed(int64_t length) const { return length * sizeof(T); } +}; + +template <> +struct HashParams { + double null_percent; + int32_t byte_width; + void GenerateTestData(const int64_t length, const int64_t num_unique, + std::shared_ptr* arr) const { + std::vector draws; + test::randint(length, 0, num_unique, &draws); + + const int64_t total_bytes = this->byte_width * num_unique; + std::vector uniques(total_bytes); + const uint32_t seed = 0; + test::random_bytes(total_bytes, seed, uniques.data()); + + std::vector is_valid; + if (this->null_percent > 0) { + test::random_is_valid(length, this->null_percent, &is_valid); + } + + StringBuilder builder; + for (int64_t i = 0; i < length; ++i) { + if (this->null_percent == 0 || is_valid[i]) { + ABORT_NOT_OK(builder.Append(uniques.data() + this->byte_width * draws[i], + this->byte_width)); + } else { + ABORT_NOT_OK(builder.AppendNull()); + } + } + ABORT_NOT_OK(builder.Finish(arr)); + } + + int64_t GetBytesProcessed(int64_t length) const { return length * byte_width; } +}; + +template +void BenchUnique(benchmark::State& state, const ParamType& params, int64_t length, + int64_t num_unique) { + std::shared_ptr arr; + params.GenerateTestData(length, num_unique, &arr); + + FunctionContext ctx; + while (state.KeepRunning()) { + std::shared_ptr out; + ABORT_NOT_OK(Unique(&ctx, Datum(arr), &out)); + } + state.SetBytesProcessed(params.GetBytesProcessed(length)); +} + +template +void BenchDictionaryEncode(benchmark::State& state, const ParamType& params, + int64_t length, int64_t num_unique) { + std::shared_ptr arr; + params.GenerateTestData(length, num_unique, &arr); + + FunctionContext ctx; + while (state.KeepRunning()) { + Datum out; + ABORT_NOT_OK(DictionaryEncode(&ctx, Datum(arr), &out)); + } + state.SetBytesProcessed(params.GetBytesProcessed(length)); +} + +static void BM_UniqueInt64NoNulls(benchmark::State& state) { + BenchUnique(state, HashParams{0}, state.range(0), state.range(1)); +} + +static void BM_UniqueInt64WithNulls(benchmark::State& state) { + BenchUnique(state, HashParams{0.05}, state.range(0), state.range(1)); +} + +static void BM_UniqueString10bytes(benchmark::State& state) { + // Byte strings with 10 bytes each + BenchUnique(state, HashParams{0.05, 10}, state.range(0), state.range(1)); +} + +static void BM_UniqueString100bytes(benchmark::State& state) { + // Byte strings with 100 bytes each + BenchUnique(state, HashParams{0.05, 100}, state.range(0), state.range(1)); +} + +BENCHMARK(BM_BuildDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_BuildStringDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); + +constexpr int64_t kHashBenchmarkLength = 1 << 24; + +#define ADD_HASH_ARGS(WHAT) \ + WHAT->Args({kHashBenchmarkLength, 50}) \ + ->Args({kHashBenchmarkLength, 1 << 10}) \ + ->Args({kHashBenchmarkLength, 10 * 1 << 10}) \ + ->Args({kHashBenchmarkLength, 1 << 20}) \ + ->MinTime(1.0) \ + ->Unit(benchmark::kMicrosecond) \ + ->UseRealTime() + +ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64NoNulls)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64WithNulls)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueString10bytes)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueString100bytes)); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc index c73bfa309fd..84af8f7c6b0 100644 --- a/cpp/src/arrow/compute/compute-test.cc +++ b/cpp/src/arrow/compute/compute-test.cc @@ -869,8 +869,8 @@ TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) { return; } - const int64_t kTotalValues = 10000; - const int64_t kRepeats = 10; + const int64_t kTotalValues = 1000000; + const int64_t kRepeats = 5; vector values; vector uniques; diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index 66c907369e3..e46c216921a 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -260,7 +260,7 @@ struct HashDictionary> { COMPUTE_HASH; \ while (kHashSlotEmpty != new_hash_slots[j]) { \ ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ + if (ARROW_PREDICT_FALSE(j == new_size)) { \ j = 0; \ } \ } \ From 2885c6457cfbb46e7d80f0104dd3190e08cae560 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 28 Nov 2017 18:35:43 -0500 Subject: [PATCH 2/3] Multiply bytes processed by state.iterations() Change-Id: I0ff68d4f28fc8ccf7b2f4f5a4b8cc7e5c03aa717 --- cpp/src/arrow/compute/compute-benchmark.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index ee8df12609f..aa7d899c864 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -153,7 +153,7 @@ void BenchUnique(benchmark::State& state, const ParamType& params, int64_t lengt std::shared_ptr out; ABORT_NOT_OK(Unique(&ctx, Datum(arr), &out)); } - state.SetBytesProcessed(params.GetBytesProcessed(length)); + state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); } template @@ -167,7 +167,7 @@ void BenchDictionaryEncode(benchmark::State& state, const ParamType& params, Datum out; ABORT_NOT_OK(DictionaryEncode(&ctx, Datum(arr), &out)); } - state.SetBytesProcessed(params.GetBytesProcessed(length)); + state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); } static void BM_UniqueInt64NoNulls(benchmark::State& state) { From 638f1a116d5dbd8b9fbae9f4b90a3d2f9484dc4d Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Tue, 28 Nov 2017 18:40:58 -0500 Subject: [PATCH 3/3] Decrease resize load factor to 0.5 Change-Id: I3e32cf5542c1eb6173eb47d624d43893008ee0ee --- cpp/src/arrow/compute/kernels/hash.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index e46c216921a..750f1d36ac7 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -43,7 +43,7 @@ typedef int32_t hash_slot_t; static constexpr hash_slot_t kHashSlotEmpty = std::numeric_limits::max(); // The maximum load factor for the hash table before resizing. -static constexpr double kMaxHashTableLoad = 0.7; +static constexpr double kMaxHashTableLoad = 0.5; enum class SIMDMode : char { NOSIMD, SSE4, AVX2 };