diff --git a/cpp/src/arrow/compute/compute-benchmark.cc b/cpp/src/arrow/compute/compute-benchmark.cc index 974fffcd6d9..aa7d899c864 100644 --- a/cpp/src/arrow/compute/compute-benchmark.cc +++ b/cpp/src/arrow/compute/compute-benchmark.cc @@ -81,8 +81,131 @@ static void BM_BuildStringDictionary( state.SetBytesProcessed(state.iterations() * total_bytes); } -BENCHMARK(BM_BuildDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); -BENCHMARK(BM_BuildStringDictionary)->Repetitions(3)->Unit(benchmark::kMicrosecond); +template +struct HashParams { + using T = typename Type::c_type; + + double null_percent; + + void GenerateTestData(const int64_t length, const int64_t num_unique, + std::shared_ptr* arr) const { + std::vector draws; + std::vector values; + std::vector is_valid; + test::randint(length, 0, num_unique, &draws); + for (int64_t draw : draws) { + values.push_back(draw); + } + + if (this->null_percent > 0) { + test::random_is_valid(length, this->null_percent, &is_valid); + ArrayFromVector(is_valid, values, arr); + } else { + ArrayFromVector(values, arr); + } + } + + int64_t GetBytesProcessed(int64_t length) const { return length * sizeof(T); } +}; + +template <> +struct HashParams { + double null_percent; + int32_t byte_width; + void GenerateTestData(const int64_t length, const int64_t num_unique, + std::shared_ptr* arr) const { + std::vector draws; + test::randint(length, 0, num_unique, &draws); + + const int64_t total_bytes = this->byte_width * num_unique; + std::vector uniques(total_bytes); + const uint32_t seed = 0; + test::random_bytes(total_bytes, seed, uniques.data()); + + std::vector is_valid; + if (this->null_percent > 0) { + test::random_is_valid(length, this->null_percent, &is_valid); + } + + StringBuilder builder; + for (int64_t i = 0; i < length; ++i) { + if (this->null_percent == 0 || is_valid[i]) { + ABORT_NOT_OK(builder.Append(uniques.data() + this->byte_width * draws[i], + this->byte_width)); + } else { + ABORT_NOT_OK(builder.AppendNull()); + } + } + ABORT_NOT_OK(builder.Finish(arr)); + } + + int64_t GetBytesProcessed(int64_t length) const { return length * byte_width; } +}; + +template +void BenchUnique(benchmark::State& state, const ParamType& params, int64_t length, + int64_t num_unique) { + std::shared_ptr arr; + params.GenerateTestData(length, num_unique, &arr); + + FunctionContext ctx; + while (state.KeepRunning()) { + std::shared_ptr out; + ABORT_NOT_OK(Unique(&ctx, Datum(arr), &out)); + } + state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); +} + +template +void BenchDictionaryEncode(benchmark::State& state, const ParamType& params, + int64_t length, int64_t num_unique) { + std::shared_ptr arr; + params.GenerateTestData(length, num_unique, &arr); + + FunctionContext ctx; + while (state.KeepRunning()) { + Datum out; + ABORT_NOT_OK(DictionaryEncode(&ctx, Datum(arr), &out)); + } + state.SetBytesProcessed(state.iterations() * params.GetBytesProcessed(length)); +} + +static void BM_UniqueInt64NoNulls(benchmark::State& state) { + BenchUnique(state, HashParams{0}, state.range(0), state.range(1)); +} + +static void BM_UniqueInt64WithNulls(benchmark::State& state) { + BenchUnique(state, HashParams{0.05}, state.range(0), state.range(1)); +} + +static void BM_UniqueString10bytes(benchmark::State& state) { + // Byte strings with 10 bytes each + BenchUnique(state, HashParams{0.05, 10}, state.range(0), state.range(1)); +} + +static void BM_UniqueString100bytes(benchmark::State& state) { + // Byte strings with 100 bytes each + BenchUnique(state, HashParams{0.05, 100}, state.range(0), state.range(1)); +} + +BENCHMARK(BM_BuildDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); +BENCHMARK(BM_BuildStringDictionary)->MinTime(1.0)->Unit(benchmark::kMicrosecond); + +constexpr int64_t kHashBenchmarkLength = 1 << 24; + +#define ADD_HASH_ARGS(WHAT) \ + WHAT->Args({kHashBenchmarkLength, 50}) \ + ->Args({kHashBenchmarkLength, 1 << 10}) \ + ->Args({kHashBenchmarkLength, 10 * 1 << 10}) \ + ->Args({kHashBenchmarkLength, 1 << 20}) \ + ->MinTime(1.0) \ + ->Unit(benchmark::kMicrosecond) \ + ->UseRealTime() + +ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64NoNulls)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueInt64WithNulls)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueString10bytes)); +ADD_HASH_ARGS(BENCHMARK(BM_UniqueString100bytes)); } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/compute-test.cc b/cpp/src/arrow/compute/compute-test.cc index c73bfa309fd..84af8f7c6b0 100644 --- a/cpp/src/arrow/compute/compute-test.cc +++ b/cpp/src/arrow/compute/compute-test.cc @@ -869,8 +869,8 @@ TYPED_TEST(TestHashKernelPrimitive, PrimitiveResizeTable) { return; } - const int64_t kTotalValues = 10000; - const int64_t kRepeats = 10; + const int64_t kTotalValues = 1000000; + const int64_t kRepeats = 5; vector values; vector uniques; diff --git a/cpp/src/arrow/compute/kernels/hash.cc b/cpp/src/arrow/compute/kernels/hash.cc index 66c907369e3..750f1d36ac7 100644 --- a/cpp/src/arrow/compute/kernels/hash.cc +++ b/cpp/src/arrow/compute/kernels/hash.cc @@ -43,7 +43,7 @@ typedef int32_t hash_slot_t; static constexpr hash_slot_t kHashSlotEmpty = std::numeric_limits::max(); // The maximum load factor for the hash table before resizing. -static constexpr double kMaxHashTableLoad = 0.7; +static constexpr double kMaxHashTableLoad = 0.5; enum class SIMDMode : char { NOSIMD, SSE4, AVX2 }; @@ -260,7 +260,7 @@ struct HashDictionary> { COMPUTE_HASH; \ while (kHashSlotEmpty != new_hash_slots[j]) { \ ++j; \ - if (ARROW_PREDICT_FALSE(j == hash_table_size_)) { \ + if (ARROW_PREDICT_FALSE(j == new_size)) { \ j = 0; \ } \ } \