From c9bbff22898993ae62410fe33e3a06dc1b9bf94b Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Sun, 11 Jun 2023 13:01:53 +0800 Subject: [PATCH 1/9] GH-36059: [C++][Compute] Reserve space for hashtable for scalar look up functions --- cpp/src/arrow/compute/kernels/scalar_set_lookup.cc | 10 +++++++++- .../compute/kernels/scalar_set_lookup_benchmark.cc | 10 ++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc index c3d2bc5417a..3c06f3ded50 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc @@ -36,16 +36,23 @@ namespace { template struct SetLookupState : public KernelState { - explicit SetLookupState(MemoryPool* pool) : lookup_table(pool, 0) {} + explicit SetLookupState(MemoryPool* pool) : lookup_table(pool, 0), memory_pool(pool) {} Status Init(const SetLookupOptions& options) { if (options.value_set.is_array()) { const ArrayData& value_set = *options.value_set.array(); memo_index_to_value_index.reserve(value_set.length); + lookup_table = + MemoTable(memory_pool, + ::arrow::internal::HashTable::kLoadFactor * value_set.length); RETURN_NOT_OK(AddArrayValueSet(options, *options.value_set.array())); } else if (options.value_set.kind() == Datum::CHUNKED_ARRAY) { const ChunkedArray& value_set = *options.value_set.chunked_array(); memo_index_to_value_index.reserve(value_set.length()); + lookup_table = + MemoTable(memory_pool, + ::arrow::internal::HashTable::kLoadFactor * value_set.length()); + int64_t offset = 0; for (const std::shared_ptr& chunk : value_set.chunks()) { RETURN_NOT_OK(AddArrayValueSet(options, *chunk->data(), offset)); @@ -99,6 +106,7 @@ struct SetLookupState : public KernelState { using MemoTable = typename HashTraits::MemoTableType; MemoTable lookup_table; + MemoryPool* memory_pool; // When there are duplicates in value_set, the MemoTable indices must // be mapped back to indices in the value_set. std::vector memo_index_to_value_index; diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc index c49dd740848..d4b96559b95 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc @@ -106,6 +106,10 @@ static void IndexInInt64SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); } +static void IndexInInt32LargeSet(benchmark::State& state) { + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); +} + static void IsInInt8SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); } @@ -122,6 +126,10 @@ static void IsInInt64SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); } +static void IsInInt32LargeSet(benchmark::State& state) { + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); +} + BENCHMARK(IndexInStringSmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInStringSmallSet)->RangeMultiplier(4)->Range(2, 64); @@ -134,10 +142,12 @@ BENCHMARK(IndexInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8); BENCHMARK(IndexInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IndexInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IndexInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64); +BENCHMARK(IndexInInt32LargeSet)->RangeMultiplier(10)->Range(100, 10000000); BENCHMARK(IsInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8); BENCHMARK(IsInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64); +BENCHMARK(IsInInt32LargeSet)->RangeMultiplier(10)->Range(100, 10000000); } // namespace compute } // namespace arrow From d78a43ba90a861c3799e72bb7c34fb00426368c8 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Sun, 11 Jun 2023 13:17:00 +0800 Subject: [PATCH 2/9] update benchmark --- .../kernels/scalar_set_lookup_benchmark.cc | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc index d4b96559b95..9b11661ba96 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc @@ -56,8 +56,8 @@ static void SetLookupBenchmarkString(benchmark::State& state, template static void SetLookupBenchmarkNumeric(benchmark::State& state, const std::string& func_name, - const int64_t value_set_length) { - const int64_t array_length = 1 << 18; + const int64_t value_set_length, + const int64_t array_length) { const int64_t value_min = 0; const int64_t value_max = std::numeric_limits::max(); const double null_probability = 0.1 / value_set_length; @@ -91,43 +91,51 @@ static void IsInStringLargeSet(benchmark::State& state) { } static void IndexInInt8SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), + 1 << 18); } static void IndexInInt16SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), + 1 << 18); } static void IndexInInt32SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), + 1 << 18); } static void IndexInInt64SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), + 1 << 18); } static void IndexInInt32LargeSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), 10); } static void IsInInt8SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), + 1 << 18); } static void IsInInt16SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), + 1 << 18); } static void IsInInt32SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), + 1 << 18); } static void IsInInt64SmallSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), + 1 << 18); } static void IsInInt32LargeSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0)); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), 10); } BENCHMARK(IndexInStringSmallSet)->RangeMultiplier(4)->Range(2, 64); From aeaf701860400fff02e828e271d513bec5471891 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Wed, 21 Jun 2023 09:27:34 +0800 Subject: [PATCH 3/9] change hash table to optional to avoid excess allocation --- .../arrow/compute/kernels/scalar_set_lookup.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc index 3c06f3ded50..57432f0d19c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc @@ -36,7 +36,7 @@ namespace { template struct SetLookupState : public KernelState { - explicit SetLookupState(MemoryPool* pool) : lookup_table(pool, 0), memory_pool(pool) {} + explicit SetLookupState(MemoryPool* pool) : lookup_table(), memory_pool(pool) {} Status Init(const SetLookupOptions& options) { if (options.value_set.is_array()) { @@ -61,8 +61,8 @@ struct SetLookupState : public KernelState { } else { return Status::Invalid("value_set should be an array or chunked array"); } - if (!options.skip_nulls && lookup_table.GetNull() >= 0) { - null_index = memo_index_to_value_index[lookup_table.GetNull()]; + if (!options.skip_nulls && lookup_table.value().GetNull() >= 0) { + null_index = memo_index_to_value_index[lookup_table.value().GetNull()]; } return Status::OK(); } @@ -82,7 +82,7 @@ struct SetLookupState : public KernelState { DCHECK_EQ(memo_index, memo_size); memo_index_to_value_index.push_back(index); }; - RETURN_NOT_OK(lookup_table.GetOrInsert( + RETURN_NOT_OK(lookup_table.value().GetOrInsert( v, std::move(on_found), std::move(on_not_found), &unused_memo_index)); ++index; return Status::OK(); @@ -96,7 +96,7 @@ struct SetLookupState : public KernelState { DCHECK_EQ(memo_index, memo_size); memo_index_to_value_index.push_back(index); }; - lookup_table.GetOrInsertNull(std::move(on_found), std::move(on_not_found)); + lookup_table.value().GetOrInsertNull(std::move(on_found), std::move(on_not_found)); ++index; return Status::OK(); }; @@ -105,7 +105,7 @@ struct SetLookupState : public KernelState { } using MemoTable = typename HashTraits::MemoTableType; - MemoTable lookup_table; + std::optional lookup_table; MemoryPool* memory_pool; // When there are duplicates in value_set, the MemoTable indices must // be mapped back to indices in the value_set. @@ -272,7 +272,7 @@ struct IndexInVisitor { VisitArraySpanInline( data, [&](T v) { - int32_t index = state.lookup_table.Get(v); + int32_t index = state.lookup_table.value().Get(v); if (index != -1) { bitmap_writer.Set(); @@ -366,7 +366,7 @@ struct IsInVisitor { VisitArraySpanInline( this->data, [&](T v) { - if (state.lookup_table.Get(v) != -1) { + if (state.lookup_table.value().Get(v) != -1) { writer.Set(); } else { writer.Clear(); From 14dc6000423ec4fd1451ca5de4d34dd51cc6d783 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Wed, 21 Jun 2023 10:11:26 +0800 Subject: [PATCH 4/9] add comment --- cpp/src/arrow/compute/kernels/scalar_set_lookup.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc index 57432f0d19c..5d4cf3f4cbd 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc @@ -36,7 +36,7 @@ namespace { template struct SetLookupState : public KernelState { - explicit SetLookupState(MemoryPool* pool) : lookup_table(), memory_pool(pool) {} + explicit SetLookupState(MemoryPool* pool) : memory_pool(pool) {} Status Init(const SetLookupOptions& options) { if (options.value_set.is_array()) { @@ -105,7 +105,7 @@ struct SetLookupState : public KernelState { } using MemoTable = typename HashTraits::MemoTableType; - std::optional lookup_table; + std::optional lookup_table; // use optional for delayed initialization MemoryPool* memory_pool; // When there are duplicates in value_set, the MemoTable indices must // be mapped back to indices in the value_set. From 22e5c420ad114c119cbbff2f2a4c4a6be1332c1e Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Wed, 21 Jun 2023 10:52:48 +0800 Subject: [PATCH 5/9] change query size to 1000 --- cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc index 9b11661ba96..ea313807c4f 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc @@ -111,7 +111,8 @@ static void IndexInInt64SmallSet(benchmark::State& state) { } static void IndexInInt32LargeSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), 10); + SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), + 1000); } static void IsInInt8SmallSet(benchmark::State& state) { @@ -135,7 +136,7 @@ static void IsInInt64SmallSet(benchmark::State& state) { } static void IsInInt32LargeSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), 10); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), 1000); } BENCHMARK(IndexInStringSmallSet)->RangeMultiplier(4)->Range(2, 64); From c19bbe62a0f5b28623a5df70296a69ecd065e59b Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Wed, 21 Jun 2023 11:00:01 +0800 Subject: [PATCH 6/9] use deref operator --- cpp/src/arrow/compute/kernels/scalar_set_lookup.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc index 5d4cf3f4cbd..0fbc5b62fe1 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup.cc @@ -61,8 +61,8 @@ struct SetLookupState : public KernelState { } else { return Status::Invalid("value_set should be an array or chunked array"); } - if (!options.skip_nulls && lookup_table.value().GetNull() >= 0) { - null_index = memo_index_to_value_index[lookup_table.value().GetNull()]; + if (!options.skip_nulls && lookup_table->GetNull() >= 0) { + null_index = memo_index_to_value_index[lookup_table->GetNull()]; } return Status::OK(); } @@ -82,7 +82,7 @@ struct SetLookupState : public KernelState { DCHECK_EQ(memo_index, memo_size); memo_index_to_value_index.push_back(index); }; - RETURN_NOT_OK(lookup_table.value().GetOrInsert( + RETURN_NOT_OK(lookup_table->GetOrInsert( v, std::move(on_found), std::move(on_not_found), &unused_memo_index)); ++index; return Status::OK(); @@ -96,7 +96,7 @@ struct SetLookupState : public KernelState { DCHECK_EQ(memo_index, memo_size); memo_index_to_value_index.push_back(index); }; - lookup_table.value().GetOrInsertNull(std::move(on_found), std::move(on_not_found)); + lookup_table->GetOrInsertNull(std::move(on_found), std::move(on_not_found)); ++index; return Status::OK(); }; @@ -272,7 +272,7 @@ struct IndexInVisitor { VisitArraySpanInline( data, [&](T v) { - int32_t index = state.lookup_table.value().Get(v); + int32_t index = state.lookup_table->Get(v); if (index != -1) { bitmap_writer.Set(); @@ -366,7 +366,7 @@ struct IsInVisitor { VisitArraySpanInline( this->data, [&](T v) { - if (state.lookup_table.value().Get(v) != -1) { + if (state.lookup_table->Get(v) != -1) { writer.Set(); } else { writer.Clear(); From 473ed475bdb9e084e7b4df6a96494dcf89943ba0 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Wed, 21 Jun 2023 11:18:00 +0800 Subject: [PATCH 7/9] improve benchmark --- .../kernels/scalar_set_lookup_benchmark.cc | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc index ea313807c4f..2a893f68fb3 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc @@ -51,6 +51,7 @@ static void SetLookupBenchmarkString(benchmark::State& state, } state.SetItemsProcessed(state.iterations() * array_length); state.SetBytesProcessed(state.iterations() * values->data()->buffers[2]->size()); + state.counters["value_set_length"] = value_set_length; } template @@ -72,6 +73,7 @@ static void SetLookupBenchmarkNumeric(benchmark::State& state, } state.SetItemsProcessed(state.iterations() * array_length); state.SetBytesProcessed(state.iterations() * values->data()->buffers[1]->size()); + state.counters["value_set_length"] = value_set_length; } static void IndexInStringSmallSet(benchmark::State& state) { @@ -90,53 +92,56 @@ static void IsInStringLargeSet(benchmark::State& state) { SetLookupBenchmarkString(state, "is_in_meta_binary", 1 << 10); } +constexpr int64_t kArrayLengthWithSmallSet = 1 << 18; +constexpr int64_t kArrayLengthWithLargeSet = 1000; static void IndexInInt8SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IndexInInt16SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IndexInInt32SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IndexInInt64SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IndexInInt32LargeSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), - 1000); + kArrayLengthWithLargeSet); } static void IsInInt8SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IsInInt16SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IsInInt32SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IsInInt64SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), - 1 << 18); + kArrayLengthWithSmallSet); } static void IsInInt32LargeSet(benchmark::State& state) { - SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), 1000); + SetLookupBenchmarkNumeric(state, "is_in_meta_binary", state.range(0), + kArrayLengthWithLargeSet); } BENCHMARK(IndexInStringSmallSet)->RangeMultiplier(4)->Range(2, 64); @@ -151,12 +156,12 @@ BENCHMARK(IndexInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8); BENCHMARK(IndexInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IndexInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IndexInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64); -BENCHMARK(IndexInInt32LargeSet)->RangeMultiplier(10)->Range(100, 10000000); +BENCHMARK(IndexInInt32LargeSet)->RangeMultiplier(100)->Range(100, 1000000); BENCHMARK(IsInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8); BENCHMARK(IsInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64); -BENCHMARK(IsInInt32LargeSet)->RangeMultiplier(10)->Range(100, 10000000); +BENCHMARK(IsInInt32LargeSet)->RangeMultiplier(100)->Range(100, 1000000); } // namespace compute } // namespace arrow From 2c4e2b8fdb751ec72ede8fe70eb68b465b338099 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 22 Jun 2023 17:17:47 +0200 Subject: [PATCH 8/9] Further nits --- .../arrow/compute/kernels/scalar_set_lookup_benchmark.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc index 2a893f68fb3..be9652601c9 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc @@ -92,8 +92,9 @@ static void IsInStringLargeSet(benchmark::State& state) { SetLookupBenchmarkString(state, "is_in_meta_binary", 1 << 10); } -constexpr int64_t kArrayLengthWithSmallSet = 1 << 18; -constexpr int64_t kArrayLengthWithLargeSet = 1000; +static constexpr int64_t kArrayLengthWithSmallSet = 1 << 18; +static constexpr int64_t kArrayLengthWithLargeSet = 1000; + static void IndexInInt8SmallSet(benchmark::State& state) { SetLookupBenchmarkNumeric(state, "index_in_meta_binary", state.range(0), kArrayLengthWithSmallSet); @@ -156,12 +157,12 @@ BENCHMARK(IndexInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8); BENCHMARK(IndexInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IndexInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IndexInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64); -BENCHMARK(IndexInInt32LargeSet)->RangeMultiplier(100)->Range(100, 1000000); +BENCHMARK(IndexInInt32LargeSet)->RangeMultiplier(100)->Range(1000, 1000000); BENCHMARK(IsInInt8SmallSet)->RangeMultiplier(4)->Range(2, 8); BENCHMARK(IsInInt16SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInInt32SmallSet)->RangeMultiplier(4)->Range(2, 64); BENCHMARK(IsInInt64SmallSet)->RangeMultiplier(4)->Range(2, 64); -BENCHMARK(IsInInt32LargeSet)->RangeMultiplier(100)->Range(100, 1000000); +BENCHMARK(IsInInt32LargeSet)->RangeMultiplier(100)->Range(1000, 1000000); } // namespace compute } // namespace arrow From 89b3a76ca9b8019a6c1a5301efe82b9f3e2a2ae1 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 22 Jun 2023 17:47:34 +0200 Subject: [PATCH 9/9] Fix compilation warning --- cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc index be9652601c9..9158c518b41 100644 --- a/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_set_lookup_benchmark.cc @@ -51,7 +51,7 @@ static void SetLookupBenchmarkString(benchmark::State& state, } state.SetItemsProcessed(state.iterations() * array_length); state.SetBytesProcessed(state.iterations() * values->data()->buffers[2]->size()); - state.counters["value_set_length"] = value_set_length; + state.counters["value_set_length"] = static_cast(value_set_length); } template @@ -73,7 +73,7 @@ static void SetLookupBenchmarkNumeric(benchmark::State& state, } state.SetItemsProcessed(state.iterations() * array_length); state.SetBytesProcessed(state.iterations() * values->data()->buffers[1]->size()); - state.counters["value_set_length"] = value_set_length; + state.counters["value_set_length"] = static_cast(value_set_length); } static void IndexInStringSmallSet(benchmark::State& state) {