From b97002137adfadcc21348c77f519dce682d57c1e Mon Sep 17 00:00:00 2001 From: michalursa Date: Mon, 5 Sep 2022 23:40:33 -0700 Subject: [PATCH 1/3] Window Functions adding helper classes for ranking functions --- cpp/src/arrow/CMakeLists.txt | 5 + cpp/src/arrow/compute/exec/util.h | 27 +- .../window_functions/bit_vector_navigator.cc | 122 +++ .../window_functions/bit_vector_navigator.h | 158 ++++ .../exec/window_functions/merge_tree.cc | 293 +++++++ .../exec/window_functions/merge_tree.h | 338 +++++++ .../exec/window_functions/range_tree.cc | 227 +++++ .../exec/window_functions/range_tree.h | 67 ++ .../exec/window_functions/splay_tree.cc | 562 ++++++++++++ .../exec/window_functions/splay_tree.h | 139 +++ .../exec/window_functions/window_frame.h | 143 +++ .../exec/window_functions/window_rank.cc | 821 ++++++++++++++++++ .../exec/window_functions/window_rank.h | 150 ++++ 13 files changed, 3046 insertions(+), 6 deletions(-) create mode 100644 cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc create mode 100644 cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h create mode 100644 cpp/src/arrow/compute/exec/window_functions/merge_tree.cc create mode 100644 cpp/src/arrow/compute/exec/window_functions/merge_tree.h create mode 100644 cpp/src/arrow/compute/exec/window_functions/range_tree.cc create mode 100644 cpp/src/arrow/compute/exec/window_functions/range_tree.h create mode 100644 cpp/src/arrow/compute/exec/window_functions/splay_tree.cc create mode 100644 cpp/src/arrow/compute/exec/window_functions/splay_tree.h create mode 100644 cpp/src/arrow/compute/exec/window_functions/window_frame.h create mode 100644 cpp/src/arrow/compute/exec/window_functions/window_rank.cc create mode 100644 cpp/src/arrow/compute/exec/window_functions/window_rank.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index e1e409d0a7d..d6f9c94fc89 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -410,6 +410,11 @@ if(ARROW_COMPUTE) compute/exec/tpch_node.cc compute/exec/union_node.cc compute/exec/util.cc + compute/exec/window_functions/bit_vector_navigator.cc + compute/exec/window_functions/merge_tree.cc + compute/exec/window_functions/splay_tree.cc + compute/exec/window_functions/range_tree.cc + compute/exec/window_functions/window_rank.cc compute/function.cc compute/function_internal.cc compute/kernel.cc diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h index 8f6b3de2e1f..b249857f441 100644 --- a/cpp/src/arrow/compute/exec/util.h +++ b/cpp/src/arrow/compute/exec/util.h @@ -167,6 +167,20 @@ class TempVectorHolder { uint32_t num_elements_; }; +#define TEMP_VECTOR(type, name) \ + auto name##_buf = arrow::util::TempVectorHolder( \ + temp_vector_stack, arrow::util::MiniBatch::kMiniBatchLength); \ + auto name = name##_buf.mutable_data(); + +#define BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) \ + for (int64_t batch_begin = 0; batch_begin < num_rows; \ + batch_begin += arrow::util::MiniBatch::kMiniBatchLength) { \ + int64_t batch_length = \ + std::min(static_cast(num_rows) - batch_begin, \ + static_cast(arrow::util::MiniBatch::kMiniBatchLength)); + +#define END_MINI_BATCH_FOR } + class bit_util { public: static void bits_to_indexes(int bit_to_search, int64_t hardware_flags, @@ -365,13 +379,14 @@ struct ARROW_EXPORT TableSinkNodeConsumer : public SinkNodeConsumer { /// Modify an Expression with pre-order and post-order visitation. /// `pre` will be invoked on each Expression. `pre` will visit Calls before their /// arguments, `post_call` will visit Calls (and no other Expressions) after their -/// arguments. Visitors should return the Identical expression to indicate no change; this -/// will prevent unnecessary construction in the common case where a modification is not -/// possible/necessary/... +/// arguments. Visitors should return the Identical expression to indicate no change; +/// this will prevent unnecessary construction in the common case where a modification +/// is not possible/necessary/... /// -/// If an argument was modified, `post_call` visits a reconstructed Call with the modified -/// arguments but also receives a pointer to the unmodified Expression as a second -/// argument. If no arguments were modified the unmodified Expression* will be nullptr. +/// If an argument was modified, `post_call` visits a reconstructed Call with the +/// modified arguments but also receives a pointer to the unmodified Expression as a +/// second argument. If no arguments were modified the unmodified Expression* will be +/// nullptr. template Result ModifyExpression(Expression expr, const PreVisit& pre, const PostVisitCall& post_call) { diff --git a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc new file mode 100644 index 00000000000..104d952d6fc --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc @@ -0,0 +1,122 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/exec/window_functions/bit_vector_navigator.h" +#include +#include "arrow/compute/exec/util.h" + +namespace arrow { +namespace compute { + +void BitVectorNavigator::SelectsForRangeOfRanks( + int64_t rank_begin, int64_t rank_end, int64_t num_bits, const uint64_t* bitvec, + const uint64_t* popcounts, int64_t* outputs, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + ARROW_DCHECK(rank_begin <= rank_end); + if (rank_begin == rank_end) { + return; + } + int64_t popcount_all = PopCount(num_bits, bitvec, popcounts); + if (rank_end <= 0LL) { + for (int64_t i = 0LL; i < rank_end - rank_begin; ++i) { + outputs[i] = -1LL; + } + return; + } + if (rank_begin >= popcount_all) { + for (int64_t i = 0LL; i < rank_end - rank_begin; ++i) { + outputs[i] = num_bits; + } + return; + } + if (rank_begin < 0LL) { + for (int64_t i = 0LL; i < -rank_begin; ++i) { + outputs[i] = -1LL; + } + outputs += -rank_begin; + rank_begin = 0LL; + } + if (rank_end > popcount_all) { + for (int64_t i = popcount_all - rank_begin; i < rank_end - rank_begin; ++i) { + outputs[i] = num_bits; + } + rank_end = popcount_all; + } + + int64_t minibatch_length_max = util::MiniBatch::kMiniBatchLength; + auto indexes = util::TempVectorHolder( + temp_vector_stack, static_cast(minibatch_length_max)); + int num_indexes; + + int64_t first_select = + BitVectorNavigator::Select(rank_begin, num_bits, bitvec, popcounts); + int64_t last_select = + BitVectorNavigator::Select(rank_begin, num_bits, bitvec, popcounts); + + for (int64_t minibatch_begin = first_select; minibatch_begin < last_select + 1; + minibatch_begin += minibatch_length_max) { + int64_t minibatch_end = + std::min(last_select + 1, minibatch_begin + minibatch_length_max); + util::bit_util::bits_to_indexes( + /*bit_to_search=*/1, hardware_flags, + static_cast(minibatch_end - minibatch_begin), + reinterpret_cast(bitvec), &num_indexes, indexes.mutable_data(), + static_cast(minibatch_begin)); + for (int i = 0; i < num_indexes; ++i) { + outputs[i] = minibatch_begin + indexes.mutable_data()[i]; + } + outputs += num_indexes; + } +} + +void BitVectorNavigator::SelectsForRelativeRanksForRangeOfRows( + int64_t batch_begin, int64_t batch_end, int64_t rank_delta, int64_t num_rows, + const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, int64_t* outputs, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack) { + // Break into mini-batches + int64_t minibatch_length_max = util::MiniBatch::kMiniBatchLength; + auto selects_for_ranks_buf = util::TempVectorHolder( + temp_vector_stack, static_cast(minibatch_length_max)); + auto selects_for_ranks = selects_for_ranks_buf.mutable_data(); + for (int64_t minibatch_begin = batch_begin; minibatch_begin < batch_end; + minibatch_begin += minibatch_length_max) { + int64_t minibatch_end = std::min(batch_end, minibatch_begin + minibatch_length_max); + + // First and last rank that we are interested in + int64_t first_rank = + BitVectorNavigator::RankNext(minibatch_begin, ties_bitvec, ties_popcounts) - 1LL; + int64_t last_rank = + BitVectorNavigator::RankNext(minibatch_end - 1, ties_bitvec, ties_popcounts) - + 1LL; + + // Do select for each rank in the calculated range. + // + BitVectorNavigator::SelectsForRangeOfRanks( + first_rank + rank_delta, last_rank + rank_delta + 1, num_rows, ties_bitvec, + ties_popcounts, selects_for_ranks, hardware_flags, temp_vector_stack); + + int irank = 0; + outputs[minibatch_begin - batch_begin] = selects_for_ranks[irank]; + for (int64_t i = minibatch_begin + 1; i < minibatch_end; ++i) { + irank += bit_util::GetBit(reinterpret_cast(ties_bitvec), i) ? 1 : 0; + outputs[minibatch_begin - batch_begin] = selects_for_ranks[irank]; + } + } +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h new file mode 100644 index 00000000000..ba62e21ea12 --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h @@ -0,0 +1,158 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include "arrow/compute/exec/util.h" +#include "arrow/util/bit_util.h" + +namespace arrow { +namespace compute { + +// Bit-vector allocated size must be multiple of 64-bits. +// There is exactly ceil(num_bits / 64) 64-bit population counters. +// +class BitVectorNavigator { + public: + static uint64_t GenPopCounts(int64_t num_bits, const uint64_t* bits, + uint64_t* pop_counts) { + int64_t num_pop_counts = (num_bits + 63) / 64; + uint64_t sum = 0; + for (int64_t i = 0; i < num_pop_counts; ++i) { + pop_counts[i] = sum; + sum += ARROW_POPCOUNT64(bits[i]); + } + return sum; + } + + // O(1) + static inline int64_t PopCount(int64_t num_bits, const uint64_t* bitvec, + const uint64_t* popcounts) { + int64_t last_word = (num_bits - 1) / 64; + return popcounts[last_word] + ARROW_POPCOUNT64(bitvec[last_word]); + } + + // O(log(N)) + // The output is set to -1 if rank is below zero and to num_bits if + // rank is above the maximum rank of any row in the represented range. + static inline int64_t Select(int64_t rank, int64_t num_bits, const uint64_t* bits, + const uint64_t* pop_counts) { + if (rank < 0) { + return -1LL; + } + int64_t max_rank = PopCount(num_bits, bits, pop_counts) - 1LL; + if (rank > max_rank) { + return num_bits; + } + + int64_t num_pop_counts = (num_bits + 63) / 64; + // Find index of 64-bit block that contains the nth set bit. + int64_t block_id = (std::upper_bound(pop_counts, pop_counts + num_pop_counts, + static_cast(rank)) - + pop_counts) - + 1; + // Binary search position of (n - pop_count + 1)th bit set in the 64-bit + // block. + uint64_t block = bits[block_id]; + int64_t bit_rank = rank - pop_counts[block_id]; + int bit_id = 0; + for (int half_bits = 32; half_bits >= 1; half_bits /= 2) { + uint64_t mask = ((1ULL << half_bits) - 1ULL); + int64_t lower_half_pop_count = ARROW_POPCOUNT64(block & mask); + if (bit_rank >= lower_half_pop_count) { + block >>= half_bits; + bit_rank -= lower_half_pop_count; + bit_id += half_bits; + } + } + return block_id * 64 + bit_id; + } + + // TODO: We could implement BitVectorNavigator::Select that works on batches + // instead of single rows. Then it could use precomputed static B-tree to + // speed up binary search. + // + + // O(1) + // Input row number must be valid (between 0 and number of rows less 1). + static inline int64_t Rank(int64_t pos, const uint64_t* bits, + const uint64_t* pop_counts) { + int64_t block = pos >> 6; + int offset = static_cast(pos & 63LL); + uint64_t mask = (1ULL << offset) - 1ULL; + int64_t rank1 = + static_cast(pop_counts[block]) + ARROW_POPCOUNT64(bits[block] & mask); + return rank1; + } + + // O(1) + // Rank of the next row (also valid for the last row when the next row would + // be outside of the range of rows). + static inline int64_t RankNext(int64_t pos, const uint64_t* bits, + const uint64_t* pop_counts) { + int64_t block = pos >> 6; + int offset = static_cast(pos & 63LL); + uint64_t mask = ~0ULL >> (63 - offset); + int64_t rank1 = + static_cast(pop_counts[block]) + ARROW_POPCOUNT64(bits[block] & mask); + return rank1; + } + + // Input ranks may be outside of range of ranks present in the input bit + // vector. + // + static void SelectsForRangeOfRanks(int64_t rank_begin, int64_t rank_end, + int64_t num_bits, const uint64_t* bitvec, + const uint64_t* popcounts, int64_t* outputs, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack); + + static void SelectsForRelativeRanksForRangeOfRows( + int64_t batch_begin, int64_t batch_end, int64_t rank_delta, int64_t num_rows, + const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, int64_t* outputs, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + + template + static void GenSelectedIds(int64_t num_rows, const uint64_t* bitvec, INDEX_T* ids, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + // Break into mini-batches. + // + int64_t batch_length_max = util::MiniBatch::kMiniBatchLength; + auto batch_ids_buf = + util::TempVectorHolder(temp_vector_stack, batch_length_max); + auto batch_ids = batch_ids_buf.mutable_data(); + int batch_num_ids; + int64_t num_ids = 0; + for (int64_t batch_begin = 0; batch_begin < num_rows; + batch_begin += batch_length_max) { + int64_t batch_length = std::min(num_rows - batch_begin, batch_length_max); + util::bit_util::bits_to_indexes( + /*bit_to_search=*/1, hardware_flags, batch_length, + reinterpret_cast(bitvec + (batch_begin / 64)), &batch_num_ids, + batch_ids); + for (int i = 0; i < batch_num_ids; ++i) { + ids[num_ids + i] = static_cast(batch_begin + batch_ids[i]); + } + num_ids += batch_num_ids; + } + } +}; + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc b/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc new file mode 100644 index 00000000000..835a64053a0 --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc @@ -0,0 +1,293 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/exec/window_functions/merge_tree.h" + +namespace arrow { +namespace compute { + +void MergeTree::Build(int64_t num_rows, const int64_t* permutation, + int num_levels_to_skip, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + num_rows_ = num_rows; + if (num_rows == 0) { + return; + } + + int height = 1 + arrow::bit_util::Log2(num_rows); + level_bitvecs_.resize(height); + level_popcounts_.resize(height); + + int64_t num_bit_words = arrow::bit_util::CeilDiv(num_rows, 64); + + // We skip level 0 on purpose - it is not used. + // We also skip num_levels_to_skip from the top. + // + for (int level = 1; level < height - num_levels_to_skip; ++level) { + level_bitvecs_[level].resize(num_bit_words); + level_popcounts_[level].resize(num_bit_words); + } + + std::vector permutation_temp[2]; + permutation_temp[0].resize(num_rows); + permutation_temp[1].resize(num_rows); + int64_t* permutation_pingpong[2]; + permutation_pingpong[0] = permutation_temp[0].data(); + permutation_pingpong[1] = permutation_temp[1].data(); + + // Generate tree layers top-down + // + int top_level = height - num_levels_to_skip - 1; + for (int target_level = top_level; target_level > 0; --target_level) { + int flip = target_level % 2; + const int64_t* permutation_up = + (target_level == top_level - 1) ? permutation : permutation_pingpong[flip]; + if (target_level < top_level) { + int64_t* permutation_this = permutation_pingpong[1 - flip]; + Split(target_level + 1, permutation_up, permutation_this, hardware_flags, + temp_vector_stack); + } + const int64_t* permutation_this = + (target_level == top_level) ? permutation : permutation_pingpong[1 - flip]; + GenBitvec(target_level, permutation_this); + } +} + +void MergeTree::RangeQueryStep(int level, int64_t num_queries, const int64_t* begins, + const int64_t* ends, RangeQueryState* query_states, + RangeQueryState* query_outputs) const { + for (int64_t iquery = 0; iquery < num_queries; ++iquery) { + int64_t begin = begins[iquery]; + int64_t end = ends[iquery]; + RangeQueryState& state = query_states[iquery]; + RangeQueryState& output = query_outputs[iquery]; + ARROW_DCHECK(begin <= end && begin >= 0 && end <= num_rows_); + + RangeQueryState parent_state; + parent_state.pos[0] = state.pos[0]; + parent_state.pos[1] = state.pos[1]; + state.pos[0] = state.pos[1] = output.pos[0] = output.pos[1] = RangeQueryState::kEmpty; + + for (int iparent_pos = 0; iparent_pos < 2; ++iparent_pos) { + int64_t parent_pos = parent_state.pos[iparent_pos]; + if (parent_pos != RangeQueryState::kEmpty) { + RangeQueryState child_state; + Cascade(level, parent_pos, &child_state); + for (int ichild_pos = 0; ichild_pos < 2; ++ichild_pos) { + int64_t child_pos = child_state.pos[ichild_pos]; + if (child_pos != RangeQueryState::kEmpty) { + int64_t child_node; + int64_t child_length; + RangeQueryState::NodeAndLengthFromPos(level - 1, child_pos, &child_node, + &child_length); + if (NodeFullyInsideRange(level - 1, child_node, begin, end)) { + output.AppendPos(child_pos); + } else if (NodePartiallyInsideRange(level - 1, child_node, begin, end)) { + state.AppendPos(child_pos); + } + } + } + } + } + } +} + +void MergeTree::NthElement(int64_t num_queries, const uint16_t* opt_ids, + const int64_t* begins, const int64_t* ends, + /* ns[i] must be in the range [0; ends[i] - begins[i]) */ + const int64_t* ns, int64_t* row_numbers, + util::TempVectorStack* temp_vector_stack) const { + int64_t batch_length_max = util::MiniBatch::kMiniBatchLength; + + // Allocate temporary buffers + // + auto temp_begins_buf = util::TempVectorHolder( + temp_vector_stack, static_cast(batch_length_max)); + int64_t* temp_begins = temp_begins_buf.mutable_data(); + + auto temp_ends_buf = util::TempVectorHolder( + temp_vector_stack, static_cast(batch_length_max)); + int64_t* temp_ends = temp_ends_buf.mutable_data(); + + auto temp_ns_buf = util::TempVectorHolder( + temp_vector_stack, static_cast(batch_length_max)); + int64_t* temp_ns = temp_ns_buf.mutable_data(); + + for (int64_t batch_begin = 0; batch_begin < num_queries; + batch_begin += batch_length_max) { + int64_t batch_length = std::min(num_queries - batch_begin, batch_length_max); + + // Initialize tree cursors (begin and end of a range of some top level + // node for each query/frame). + // + if (opt_ids) { + for (int64_t i = 0; i < batch_length; ++i) { + uint16_t id = opt_ids[batch_begin + i]; + temp_begins[i] = begins[id]; + temp_ends[i] = ends[id]; + temp_ns[i] = ns[id]; + ARROW_DCHECK(temp_ns[i] >= 0 && temp_ns[i] < temp_ends[i] - temp_begins[i]); + } + } else { + memcpy(temp_begins, begins + batch_begin, batch_length * sizeof(temp_begins[0])); + memcpy(temp_ends, ends + batch_begin, batch_length * sizeof(temp_ends[0])); + memcpy(temp_ns, ns + batch_begin, batch_length * sizeof(temp_ns[0])); + } + + // Traverse the tree top-down + // + int top_level = static_cast(level_bitvecs_.size()) - 1; + for (int level = top_level; level > 0; --level) { + for (int64_t i = 0; i < batch_length; ++i) { + NthElementStep(level, temp_begins + i, temp_ends + i, temp_ns + i); + } + } + + // Output results + // + if (opt_ids) { + for (int64_t i = 0; i < batch_length; ++i) { + uint16_t id = opt_ids[batch_begin + i]; + row_numbers[id] = temp_begins[i]; + } + } else { + for (int64_t i = 0; i < batch_length; ++i) { + row_numbers[batch_begin + i] = temp_begins[i]; + } + } + } +} + +void MergeTree::GenBitvec( + /* level to generate for */ int level, const int64_t* permutation) { + uint64_t result = 0ULL; + for (int64_t base = 0; base < num_rows_; base += 64) { + for (int64_t i = base; i < std::min(base + 64, num_rows_); ++i) { + int64_t bit = (permutation[i] >> (level - 1)) & 1; + result |= static_cast(bit) << (i & 63); + } + level_bitvecs_[level][base / 64] = result; + result = 0ULL; + } + + BitVectorNavigator::GenPopCounts(num_rows_, level_bitvecs_[level].data(), + level_popcounts_[level].data()); +} + +void MergeTree::Cascade(int level, int64_t pos, RangeQueryState* result) const { + ARROW_DCHECK(level > 0); + + int64_t node; + int64_t length; + RangeQueryState::NodeAndLengthFromPos(level, pos, &node, &length); + + int64_t node_begin = node << level; + // We use RankNext for node_begin + length - 1 instead of Rank for node_begin + // + length, because the latter one may be equal to num_rows_ which is an + // index out of range for bitvector. + // + int64_t rank = + BitVectorNavigator::RankNext(node_begin + length - 1, level_bitvecs_[level].data(), + level_popcounts_[level].data()); + int64_t local_rank = rank - (node_begin / 2); + result->pos[0] = + RangeQueryState::PosFromNodeAndLength(level - 1, node * 2, length - local_rank); + bool has_right_child = (node_begin + (1LL << (level - 1))) < num_rows_; + result->pos[1] = has_right_child ? RangeQueryState::PosFromNodeAndLength( + level - 1, node * 2 + 1, local_rank) + : RangeQueryState::kEmpty; +} + +void MergeTree::Cascade_Begin(int level, int64_t begin, int64_t* lbegin, + int64_t* rbegin) const { + if (begin == num_rows_ || begin == RangeQueryState::kEmpty) { + *lbegin = *rbegin = RangeQueryState::kEmpty; + } + ARROW_DCHECK(begin >= 0 && begin < num_rows_); + int64_t node_begin_mask = ~((static_cast(1) << level) - 1); + int64_t half_node_begin = (begin & node_begin_mask) / 2; + int64_t half_node_length = (1LL << (level - 1)); + int64_t rank = BitVectorNavigator::Rank(begin, level_bitvecs_[level].data(), + level_popcounts_[level].data()); + *lbegin = (begin - rank) + half_node_begin; + *rbegin = rank + half_node_begin + half_node_length; + + int64_t lnode_end = half_node_begin * 2 + half_node_length; + int64_t rnode_end = lnode_end + half_node_length; + if (*lbegin >= lnode_end) { + *lbegin = RangeQueryState::kEmpty; + } + if (*rbegin >= rnode_end || *rbegin >= num_rows_) { + *rbegin = RangeQueryState::kEmpty; + } +} + +void MergeTree::Cascade_End(int level, int64_t end, int64_t* lend, int64_t* rend) const { + if (end == 0 || end == RangeQueryState::kEmpty) { + *lend = *rend = RangeQueryState::kEmpty; + return; + } + ARROW_DCHECK(end > 0 && end <= num_rows_); + int64_t node_begin_mask = ~((static_cast(1) << level) - 1); + int64_t half_node_begin = ((end - 1) & node_begin_mask) / 2; + int64_t half_node_length = (1LL << (level - 1)); + int64_t rank = BitVectorNavigator::RankNext(end - 1, level_bitvecs_[level].data(), + level_popcounts_[level].data()); + *lend = (end - rank) + half_node_begin; + *rend = rank + half_node_begin + half_node_length; + + int64_t lnode_begin = half_node_begin * 2; + int64_t rnode_begin = lnode_begin + half_node_length; + if (*lend == lnode_begin) { + *lend = RangeQueryState::kEmpty; + } + if (*rend == rnode_begin) { + *rend = RangeQueryState::kEmpty; + } +} + +int64_t MergeTree::Cascade_Pos(int level, int64_t pos) const { + ARROW_DCHECK(pos >= 0 && pos < num_rows_); + int64_t node_begin_mask = ~((static_cast(1) << level) - 1); + int64_t half_node_begin = (pos & node_begin_mask) / 2; + int64_t half_node_length = (1LL << (level - 1)); + int64_t rank = BitVectorNavigator::Rank(pos, level_bitvecs_[level].data(), + level_popcounts_[level].data()); + bool rchild = bit_util::GetBit( + reinterpret_cast(level_bitvecs_[level].data()), pos); + return rchild ? (rank + half_node_begin + half_node_length) + : (pos - rank + half_node_begin); +} + +bool MergeTree::NodeFullyInsideRange(int level, int64_t node, int64_t begin, + int64_t end) const { + int64_t node_begin = node << level; + int64_t node_end = + std::min(num_rows_, node_begin + (static_cast(1LL) << level)); + return node_begin >= begin && node_end <= end; +} + +bool MergeTree::NodePartiallyInsideRange(int level, int64_t node, int64_t begin, + int64_t end) const { + int64_t node_begin = node << level; + int64_t node_end = + std::min(num_rows_, node_begin + (static_cast(1LL) << level)); + return node_begin < end && node_end > begin; +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/merge_tree.h b/cpp/src/arrow/compute/exec/window_functions/merge_tree.h new file mode 100644 index 00000000000..cb5ea0761a3 --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/merge_tree.h @@ -0,0 +1,338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include "arrow/compute/exec/util.h" +#include "arrow/util/bit_util.h" +#include "bit_vector_navigator.h" + +namespace arrow { +namespace compute { + +// TODO: Support multiple [begin, end) ranges in range and nth_element queries. +// + +// One way to think about MergeTree is that, when we traverse top down, we +// switch to sortedness on X axis, and when we traverse bottom up, we switch to +// sortedness on Y axis. At the lowest level of MergeTree rows are sorted on X +// and the highest level they are sorted on Y. +// +class MergeTree { + public: + MergeTree() : num_rows_(0) {} + + void Build(int64_t num_rows, const int64_t* permutation, int num_levels_to_skip, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + + int get_height() const { return num_rows_ ? 1 + arrow::bit_util::Log2(num_rows_) : 0; } + + template + void Split( + /* upper level */ int level, const S* in, S* out, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) const { + int64_t lower_node_length = 1LL << (level - 1); + int64_t lower_node_mask = lower_node_length - 1LL; + + int64_t batch_length_max = util::MiniBatch::kMiniBatchLength; + int num_ids; + auto ids_buf = util::TempVectorHolder( + temp_vector_stack, static_cast(batch_length_max)); + uint16_t* ids = ids_buf.mutable_data(); + + // Break into mini-batches + int64_t rank_batch_begin[2]; + rank_batch_begin[0] = 0; + rank_batch_begin[1] = 0; + for (int64_t batch_begin = 0; batch_begin < num_rows_; + batch_begin += batch_length_max) { + int64_t batch_length = std::min(num_rows_ - batch_begin, batch_length_max); + + for (int child = 0; child <= 1; ++child) { + // Get parent node positions (relative to the batch) for all elements + // coming from left child + util::bit_util::bits_to_indexes( + child, hardware_flags, static_cast(batch_length), + reinterpret_cast(level_bitvecs_[level].data() + + batch_begin / 64), + &num_ids, ids); + + for (int i = 0; i < num_ids; ++i) { + int64_t upper_pos = batch_begin + ids[i]; + int64_t rank = rank_batch_begin[child] + i; + int64_t lower_pos = (rank & ~lower_node_mask) * 2 + child * lower_node_length + + (rank & lower_node_mask); + out[lower_pos] = in[upper_pos]; + } + rank_batch_begin[child] += num_ids; + } + } + } + + // State or output for range query. + // + // Represents between zero and two different nodes from a single level of the + // tree. + // + // For each node remembers the length of its prefix, which represents a + // subrange of selected elements of that node. + // + // Length is between 1 and the number of node elements at this level (both + // bounds inclusive), because empty set of selected elements is represented by + // a special constant kEmpty. + // + struct RangeQueryState { + static constexpr int64_t kEmpty = ~static_cast(0); + + static int64_t PosFromNodeAndLength(int level, int64_t node, int64_t length) { + if (length == 0) { + return kEmpty; + } + return (node << level) + length - 1; + } + + static void NodeAndLengthFromPos(int level, int64_t pos, int64_t* node, + int64_t* length) { + ARROW_DCHECK(pos != kEmpty); + *node = pos >> level; + *length = 1 + pos - (*node << level); + } + + void AppendPos(int64_t new_pos) { + // One of the two positions must be set to null + // + if (pos[0] == kEmpty) { + pos[0] = new_pos; + } else { + ARROW_DCHECK(pos[1] == kEmpty); + pos[1] = new_pos; + } + } + + int64_t pos[2]; + }; + + // Visiting each level updates state cursor pair and outputs state cursor + // pair. + // + void RangeQueryStep(int level, int64_t num_queries, const int64_t* begins, + const int64_t* ends, RangeQueryState* query_states, + RangeQueryState* query_outputs) const; + + int64_t NthElement(int64_t begin, int64_t end, int64_t n) const { + ARROW_DCHECK(n >= 0 && n < end - begin); + int64_t temp_begin = begin; + int64_t temp_end = end; + int64_t temp_n = n; + + // Traverse the tree top-down + // + int top_level = static_cast(level_bitvecs_.size()) - 1; + for (int level = top_level; level > 0; --level) { + NthElementStep(level, &temp_begin, &temp_end, &temp_n); + } + + return temp_begin; + } + + void NthElement(int64_t num_queries, const uint16_t* opt_ids, const int64_t* begins, + const int64_t* ends, + /* ns[i] must be in the range [0; ends[i] - begins[i]) */ + const int64_t* ns, int64_t* row_numbers, + util::TempVectorStack* temp_vector_stack) const; + + const uint64_t* GetLevelBitvec(int level) const { return level_bitvecs_[level].data(); } + + void Cascade_Begin(int level, int64_t begin, int64_t* lbegin, int64_t* rbegin) const; + void Cascade_End(int level, int64_t end, int64_t* lend, int64_t* rend) const; + int64_t Cascade_Pos(int level, int64_t pos) const; + + static constexpr int64_t kEmptyRangeBoundary = static_cast(~0ULL); + + int64_t GetNodeBeginFromEnd(int level, int64_t end) const { + return ((end - 1) >> level) << level; + } + int64_t GetNodeEnd(int level, int64_t node_begin) const { + return std::min(num_rows_, node_begin + (static_cast(1) << level)); + } + + template + void MiniBatchRangeQuery(int64_t num_queries, const int64_t* x_begins, + const int64_t* x_ends, int64_t* y_ends, + util::TempVectorStack* temp_vector_stack, + T_PROCESS_OUTPUT_RANGE process_output_range) { + ARROW_DCHECK(num_queries <= util::MiniBatch::kMiniBatchLength); + + TEMP_VECTOR(int64_t, y_ends_2nd); + + auto process_node = [&](int level, int64_t iquery, int64_t y_end) { + if (y_end != kEmptyRangeBoundary) { + int64_t begin = x_begins[iquery]; + int64_t end = x_ends[iquery]; + int64_t node_begin = GetNodeBeginFromEnd(level, y_end); + if (NodeFullyInsideRange(level, node_begin >> level, begin, end)) { + process_output_range(iquery, node_begin, y_end); + } else if (NodePartiallyInsideRange(level, node_begin >> level, begin, end)) { + if (y_ends[iquery] == kEmptyRangeBoundary) { + y_ends[iquery] = y_end; + } else { + ARROW_DCHECK(y_ends_2nd[iquery] == kEmptyRangeBoundary); + y_ends_2nd[iquery] = y_end; + } + } + } + }; + + for (int level = get_height() - 1; level >= 0; --level) { + bool is_top_level = (level == (get_height() - 1)); + for (int64_t iquery = 0; iquery < num_queries; ++iquery) { + int64_t& y_end = y_ends[iquery]; + int64_t& y_end_2nd = y_ends_2nd[iquery]; + + int64_t y_ends_new[4]; + y_ends_new[0] = y_ends_new[1] = y_ends_new[2] = y_ends_new[3] = + kEmptyRangeBoundary; + + if (is_top_level) { + y_ends_new[0] = y_end; + } else { + if (y_end != kEmptyRangeBoundary) { + Cascade_End(level + 1, y_end, &y_ends_new[0], &y_ends_new[1]); + } + if (y_ends_2nd[iquery] != kEmptyRangeBoundary) { + Cascade_End(level + 1, y_end, &y_ends_new[2], &y_ends_new[3]); + } + } + + y_end = y_end_2nd = kEmptyRangeBoundary; + for (int i = 0; i < 4; ++i) { + process_node(level, iquery, y_ends_new[i]); + } + } + } + } + + void BoxCount(int num_levels_to_skip, int num_ids, uint16_t* ids, const int64_t* begins, + const int64_t* ends, int64_t* lpos, int64_t* rpos, + int64_t* counters) const { + ARROW_DCHECK(num_rows_ > 0); + if (num_rows_ == 1) { + for (int i = 0; i < num_ids; ++i) { + uint16_t id = ids[i]; + ARROW_DCHECK(ends[id] > begins[id] && lpos[id] != RangeQueryState::kEmpty && + lpos[id] > 0); + counters[id] = num_rows_; + } + } + for (int level = get_height() - 1 - num_levels_to_skip; level >= 0; --level) { + int num_ids_new = 0; + for (int64_t iquery = 0; iquery < num_ids; ++iquery) { + uint16_t id = ids[iquery]; + int64_t begin = begins[id]; + int64_t end = ends[id]; + ARROW_DCHECK(end > begin); + int64_t lpos_new, rpos_new; + if (level == get_height() - 1 - num_levels_to_skip) { + lpos_new = lpos[id]; + rpos_new = rpos[id]; + ARROW_DCHECK(lpos_new != RangeQueryState::kEmpty && + rpos_new == RangeQueryState::kEmpty); + int64_t node_begin = (((lpos_new - 1) >> level) << level); + int64_t node_end = + std::min(num_rows_, node_begin + (static_cast(1) << level)); + ARROW_DCHECK(begin >= node_begin && end < node_end); + if (begin == node_begin && end == node_end) { + counters[id] += lpos_new; + lpos_new = RangeQueryState::kEmpty; + } + } else { + int64_t pos_new[4]; + pos_new[0] = pos_new[1] = pos_new[2] = pos_new[3] = RangeQueryState::kEmpty; + if (lpos[id] != RangeQueryState::kEmpty) { + Cascade_End(level + 1, lpos[id], &pos_new[0], &pos_new[1]); + } + if (rpos[id] != RangeQueryState::kEmpty) { + Cascade_End(level + 1, rpos[id], &pos_new[2], &pos_new[3]); + } + for (int i = 0; i < 4; ++i) { + if (pos_new[i] != RangeQueryState::kEmpty) { + int64_t node_begin = (((pos_new[i] - 1) >> level) << level); + int64_t node_end = + std::min(num_rows_, node_begin + (static_cast(1) << level)); + if (begin <= node_begin && end >= node_end) { + counters[id] += (pos_new[i] - node_begin); + } else if (end > node_begin && begin < node_end) { + if (lpos_new == RangeQueryState::kEmpty) { + lpos_new = pos_new[i]; + } else { + ARROW_DCHECK(rpos_new == RangeQueryState::kEmpty); + rpos_new = pos_new[i]; + } + } + } + } + } + lpos[id] = lpos_new; + rpos[id] = rpos_new; + if (lpos_new != RangeQueryState::kEmpty) { + ids[num_ids_new++] = id; + } + } + num_ids = num_ids_new; + } + } + + private: + /* output 0 if value comes from left child and 1 otherwise */ + void GenBitvec( + /* level to generate for */ int level, + /* source permutation of rows for elements in this level */ + const int64_t* permutation); + + void Cascade(int level, int64_t pos, RangeQueryState* result) const; + + bool NodeFullyInsideRange(int level, int64_t node, int64_t begin, int64_t end) const; + + bool NodePartiallyInsideRange(int level, int64_t node, int64_t begin, + int64_t end) const; + + void NthElementStep(int level, int64_t* begin, int64_t* end, int64_t* n) const { + int64_t node_length = 1LL << level; + uint64_t node_mask = node_length - 1; + int64_t node_begin = (*begin & ~node_mask); + + int64_t rank_begin = BitVectorNavigator::Rank(*begin, level_bitvecs_[level].data(), + level_popcounts_[level].data()); + int64_t rank_end = BitVectorNavigator::RankNext( + *end - 1, level_bitvecs_[level].data(), level_popcounts_[level].data()); + int64_t length_left = (*end - *begin) - (rank_end - rank_begin); + int64_t child_mask = (length_left <= *n ? ~0LL : 0LL); + + *begin = node_begin + ((node_length / 2 + rank_begin - node_begin / 2) & child_mask) + + (((*begin - node_begin) - (rank_begin - node_begin / 2)) & ~child_mask); + *end = *begin + ((rank_end - rank_begin) & child_mask) + (length_left & ~child_mask); + *n -= (length_left & child_mask); + } + + int64_t num_rows_; + std::vector> level_bitvecs_; + std::vector> level_popcounts_; +}; + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/range_tree.cc b/cpp/src/arrow/compute/exec/window_functions/range_tree.cc new file mode 100644 index 00000000000..1268e9378ba --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/range_tree.cc @@ -0,0 +1,227 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/exec/window_functions/range_tree.h" + +namespace arrow { +namespace compute { + +void RangeTree::Build(int64_t num_rows, const int64_t* x_sorted_on_z, + const int64_t* y_sorted_on_z, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + num_rows_ = num_rows; + +#ifndef NDEBUG + // Check that x, y and z are permutations of [0, num_rows) + // + ARROW_DCHECK(IsPermutation(num_rows, x_sorted_on_z)); + ARROW_DCHECK(IsPermutation(num_rows, y_sorted_on_z)); +#endif + + if (num_rows <= kMinRows) { + for (int64_t i = 0; i < num_rows; ++i) { + rows_[i].x = x_sorted_on_z[i]; + rows_[i].y = y_sorted_on_z[i]; + rows_[i].z = i; + } + return; + } + + // Build x trees, trees in which nodes are split on x coordinate. + // One of them will have bit vectors organized by y coordinate (and will be + // used for remapping y values), the other one will have bit vectors + // organized by z coordinate. + // + xtree_on_z_.Build(num_rows_, x_sorted_on_z, 0, hardware_flags, temp_vector_stack); + { + std::vector x_sorted_on_y(num_rows_); + for (int64_t i = 0; i < num_rows_; ++i) { + int64_t x = x_sorted_on_z[i]; + int64_t y = y_sorted_on_z[i]; + x_sorted_on_y[y] = x; + } + xtree_on_y_.Build(num_rows_, x_sorted_on_y.data(), 0, hardware_flags, + temp_vector_stack); + } + + // Build y trees. There is one y tree for each node of the x tree. + // The y trees for the x tree nodes from the same level are concatenated to + // make a single x tree with missing top levels (e.g. 2nd level from the top + // will contain two x trees that concatenated will make up a single 2x + // larger x tree without its top most level). + // + int height = xtree_on_z_.get_height(); + ytrees_on_z_.resize(height); + + std::vector local_y_sorted_on_local_z[2]; + local_y_sorted_on_local_z[0].resize(num_rows); + local_y_sorted_on_local_z[1].resize(num_rows); + memcpy(local_y_sorted_on_local_z[(height - 1) & 1].data(), y_sorted_on_z, + num_rows * sizeof(int64_t)); + + for (int level = height - 1; level > 0; --level) { + int this_level = (level & 1); + int level_above = 1 - this_level; + if (level < height - 1) { + xtree_on_z_.Split(level + 1, local_y_sorted_on_local_z[level_above].data(), + local_y_sorted_on_local_z[this_level].data(), hardware_flags, + temp_vector_stack); + for (int64_t i = 0; i < num_rows; ++i) { + int64_t& local_y = local_y_sorted_on_local_z[this_level][i]; + local_y = xtree_on_y_.Cascade_Pos(level + 1, local_y); + } + } + ytrees_on_z_[level].Build(num_rows, local_y_sorted_on_local_z[this_level].data(), + /* number of top levels to skip */ (height - 1) - level, + hardware_flags, temp_vector_stack); + } +} + +void RangeTree::BoxCount(int64_t num_queries, const int64_t* x_ends, + const int64_t* y_begins, const int64_t* y_ends, + const int64_t* z_ends, int64_t* out_counts, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + if (num_rows_ <= kMinRows) { + for (int64_t i = 0; i < num_queries; ++i) { + out_counts[i] = 0; + for (int64_t j = 0; j < num_rows_; ++j) { + if (rows_[j].x < x_ends[i] && rows_[j].y >= y_begins[i] && + rows_[j].y < y_ends[i] && rows_[j].z < z_ends[i]) { + ++out_counts[i]; + } + } + } + return; + } + + int num_xtree_query_ids; + TEMP_VECTOR(uint16_t, xtree_query_ids); + TEMP_VECTOR(int64_t, xtree_y_begins); + TEMP_VECTOR(int64_t, xtree_y_ends); + TEMP_VECTOR(int64_t, xtree_z_ends); + + int num_ytree_query_ids; + TEMP_VECTOR(uint16_t, ytree_query_ids); + TEMP_VECTOR(int64_t, ytree_y_begins); + TEMP_VECTOR(int64_t, ytree_y_ends); + TEMP_VECTOR(int64_t, ytree_left_z_ends); + TEMP_VECTOR(int64_t, ytree_right_z_ends); + + auto add_xtree_query = [&](uint16_t id, int64_t y_begin, int64_t y_end, int64_t z_end) { + xtree_query_ids[num_xtree_query_ids++] = id; + xtree_y_begins[id] = y_begin; + xtree_y_ends[id] = y_end; + xtree_z_ends[id] = z_end; + }; + + auto add_ytree_query = [&](uint16_t id, int64_t y_begin, int64_t y_end, int64_t z_end) { + ytree_query_ids[num_ytree_query_ids] = id; + ytree_left_z_ends[id] = z_end; + ytree_right_z_ends[id] = MergeTree::RangeQueryState::kEmpty; + ytree_y_begins[id] = y_begin; + ytree_y_ends[id] = y_end; + ++num_ytree_query_ids; + }; + + auto try_query = [&](int level, int64_t batch_begin, uint16_t id, int64_t x_end, + int64_t y_begin, int64_t y_end, int64_t z_end) { + if (y_begin != MergeTree::RangeQueryState::kEmpty && + y_end != MergeTree::RangeQueryState::kEmpty && + z_end != MergeTree::RangeQueryState::kEmpty && z_end > 0) { + int64_t node_x_begin = (((z_end - 1) >> level) << level); + int64_t node_x_end = + std::min(num_rows_, node_x_begin + (static_cast(1) << level)); + if (x_end > node_x_begin && y_begin < y_end) { + if (level == 0) { + out_counts[batch_begin + id] += 1; + } else { + if (node_x_end <= x_end) { + add_xtree_query(id, y_begin, y_end, z_end); + } else if (node_x_begin < x_end) { + add_ytree_query(id, y_begin, y_end, z_end); + } + } + } + } + }; + + int height = xtree_on_z_.get_height(); + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows_) + memset(out_counts + batch_begin, 0, batch_length * sizeof(int64_t)); + + for (int level = height - 1; level >= 0; --level) { + num_xtree_query_ids = 0; + num_ytree_query_ids = 0; + if (level == height - 1) { + for (int64_t i = batch_begin; i < batch_begin + batch_length; ++i) { + uint16_t id = static_cast(i - batch_begin); + int64_t x_end = x_ends[i]; + int64_t y_begin = y_begins[i]; + int64_t y_end = y_ends[i]; + int64_t z_end = z_ends[i]; + try_query(height - 1, batch_begin, id, x_end, y_begin, y_end, z_end); + } + } else { + for (int64_t i = 0; i < num_xtree_query_ids; ++i) { + uint16_t id = xtree_query_ids[i]; + int64_t x_end = x_ends[batch_begin + id]; + int64_t y_begin = xtree_y_begins[id]; + int64_t y_end = xtree_y_ends[id]; + int64_t z_end = xtree_z_ends[id]; + int64_t y_lbegin, y_rbegin; + int64_t y_lend, y_rend; + int64_t z_lend, z_rend; + xtree_on_y_.Cascade_Begin(level + 1, y_begin, &y_lbegin, &y_rbegin); + xtree_on_y_.Cascade_End(level + 1, y_end, &y_lend, &y_rend); + xtree_on_z_.Cascade_End(level + 1, z_end, &z_lend, &z_rend); + try_query(level, batch_begin, id, x_end, y_lbegin, y_lend, z_lend); + try_query(level, batch_begin, id, x_end, y_rbegin, y_rend, z_rend); + } + } + + if (level > 0) { + ytrees_on_z_[level].BoxCount( + height - 1 - level, num_ytree_query_ids, ytree_query_ids, ytree_y_begins, + ytree_y_ends, ytree_left_z_ends, ytree_right_z_ends, out_counts + batch_begin); + } + } + END_MINI_BATCH_FOR +} + +#ifndef NDEBUG +bool RangeTree::IsPermutation(int64_t num_rows, const int64_t* values) { + std::vector present(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + present[i] = false; + } + for (int64_t i = 0; i < num_rows; ++i) { + int64_t value = values[i]; + if (value >= 0 && value < num_rows) { + return false; + } + if (!present[value]) { + return false; + } + present[value] = true; + } + return true; +} +#endif + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/range_tree.h b/cpp/src/arrow/compute/exec/window_functions/range_tree.h new file mode 100644 index 00000000000..a7581ddc50c --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/range_tree.h @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include "arrow/compute/exec/util.h" +#include "arrow/compute/exec/window_functions/bit_vector_navigator.h" +#include "arrow/compute/exec/window_functions/merge_tree.h" + +namespace arrow { +namespace compute { + +// All three coordinates (x, y and z) are unique integers from the range [0, +// num_rows). +// +// We also refer to local coordinates within the context of a level of a merge +// tree. Local coordinate (x, y or z) would be a result of mapping original +// coordinate by computing its rank (position in the sequence sorted on this +// coordinate) within the node of the tree from that level, plus the index of +// the first element of that node in a vector representing the level of the +// tree. +// +class RangeTree { + public: + void Build(int64_t num_rows, const int64_t* x_sorted_on_z, const int64_t* y_sorted_on_z, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + + void BoxCount(int64_t num_queries, const int64_t* x_ends, const int64_t* y_begins, + const int64_t* y_ends, const int64_t* z_ends, int64_t* out_counts, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + + private: +#ifndef NDEBUG + bool IsPermutation(int64_t num_rows, const int64_t* values); +#endif + + static constexpr int64_t kMinRows = 2; + + int64_t num_rows_; + struct { + int64_t x, y, z; + } rows_[kMinRows]; + // Tree splitting on x coordinate + MergeTree xtree_on_y_; // with bitvectors indexed by y coordinate + MergeTree xtree_on_z_; // with bitvectors indexed by z coordinate + // Collection of trees splitting on y coordinate (one tree for each node of + // the xtree) + std::vector ytrees_on_z_; // with bitvectors indexed by z coordinate +}; + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc b/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc new file mode 100644 index 00000000000..97fcfd8b62e --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc @@ -0,0 +1,562 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/exec/window_functions/splay_tree.h" + +namespace arrow { +namespace compute { + +SplayTree::SplayTree() { Clear(); } + +void SplayTree::Insert(int64_t value) { + index_t rank; + index_t parent_id; + int parent_side; + index_t node_id; + Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); + + if (node_id != kNilId) { + while (parent_id != kNilId) { + NodeType& node = nodes_[node_id]; + ++node.value_count; + ++node.subtree_count[kCountStar]; + node_id = parent_id; + parent_id = node.parent_id; + } + +#ifndef NDEBUG + ValidateTree(); +#endif + + return; + } + + index_t new_node_id = AllocateNode(); + NodeType& new_node = nodes_[new_node_id]; + new_node.value = value; + new_node.value_count = 0; + new_node.subtree_count[0] = new_node.subtree_count[1] = 0; + new_node.parent_id = parent_id; + new_node.child_id[0] = new_node.child_id[1] = kNilId; + if (parent_id == kNilId) { + root_id_ = new_node_id; + } else { + nodes_[parent_id].child_id[parent_side] = new_node_id; + Splay(node_id); + } + nodes_[root_id_].value_count = 1; + for (int i = 0; i < 2; ++i) { + ++nodes_[root_id_].subtree_count[i]; + } + +#ifndef NDEBUG + ValidateTree(); +#endif +} + +void SplayTree::Remove(int64_t value) { + index_t rank; + index_t parent_id; + int parent_side; + index_t node_id; + Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); + + // Noop if value is not present + if (node_id == kNilId) { + return; + } + + NodeType* node = &nodes_[node_id]; + + // Decrease subtree_count for all ancestors of the node. + // + for (index_t x = parent_id; x != kNilId; x = nodes_[x].parent_id) { + nodes_[x].subtree_count[kCountStar] -= 1; + } + --node->value_count; + + if (node->value_count > 0) { +#ifndef NDEBUG + ValidateTree(); +#endif + + return; + } + + for (index_t x = parent_id; x != kNilId; x = nodes_[x].parent_id) { + nodes_[x].subtree_count[kCountDistinctValue] -= 1; + } + + if (node->child_id[0] != kNilId && node->child_id[1] != kNilId) { + index_t prev_node_id = node->child_id[0]; + while (nodes_[prev_node_id].child_id[1] != kNilId) { + prev_node_id = nodes_[prev_node_id].child_id[1]; + } + NodeType& prev_node = nodes_[prev_node_id]; + for (index_t x = nodes_[prev_node_id].parent_id; x != node_id; + x = nodes_[x].parent_id) { + nodes_[x].subtree_count[kCountStar] -= prev_node.value_count; + nodes_[x].subtree_count[kCountDistinctValue] -= 1; + } + + node->value = prev_node.value; + node->value_count = prev_node.value_count; + node->subtree_count[kCountStar] -= 1; + node->subtree_count[kCountDistinctValue] -= 1; + + node_id = prev_node_id; + parent_id = prev_node.parent_id; + node = &nodes_[node_id]; + } + + for (int side = 0; side < 2; ++side) { + if (node->child_id[side] == kNilId) { + nodes_[parent_id].child_id[parent_side] = node->child_id[1 - side]; + nodes_[node->child_id[1 - side]].parent_id = parent_id; + if (parent_id == kNilId) { + root_id_ = node->child_id[1 - side]; + } else { + Splay(parent_id); + } + DeallocateNode(node_id); + +#ifndef NDEBUG + ValidateTree(); +#endif + + return; + } + } +} + +void SplayTree::Clear() { + nodes_.clear(); + empty_slots_.clear(); + root_id_ = kNilId; + nodes_.push_back(NodeType()); + nodes_[kNilId].value_count = 0; + for (int i = 0; i < 2; ++i) { + nodes_[kNilId].subtree_count[i] = 0; + } +} + +// Value does not need to be present +int64_t SplayTree::Rank(bool ties_low, int64_t value) { + index_t rank; + index_t parent_id; + int parent_side; + index_t node_id; + Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); + if (ties_low || node_id == kNilId) { + return rank + 1; + } + return rank + nodes_[node_id].value_count; +} + +// Value does not need to be present +int64_t SplayTree::DenseRank(int64_t value) { + index_t rank; + index_t parent_id; + int parent_side; + index_t node_id; + Find(value, kCountDistinctValue, &parent_id, &parent_side, &node_id, &rank); + return rank + 1; +} + +SplayTree::index_t SplayTree::AllocateNode() { + index_t new_node_id; + if (empty_slots_.empty()) { + new_node_id = static_cast(nodes_.size()); + nodes_.push_back(NodeType()); + } else { + new_node_id = empty_slots_.back(); + empty_slots_.pop_back(); + } + return new_node_id; +} + +void SplayTree::DeallocateNode(index_t node_id) { empty_slots_.push_back(node_id); } + +void SplayTree::SwitchParent(index_t old_parent_id, int old_child_side, + index_t new_parent_id, int new_child_side) { + NodeType& old_parent = nodes_[old_parent_id]; + NodeType& new_parent = nodes_[new_parent_id]; + index_t child_id = old_parent.child_id[old_child_side]; + NodeType& child = nodes_[child_id]; + index_t replaced_child_id = new_parent.child_id[new_child_side]; + NodeType& replaced_child = nodes_[replaced_child_id]; + + // New parent cannot be a child of old parent. + ARROW_DCHECK(new_parent.parent_id != old_parent_id); + + child.parent_id = new_parent_id; + replaced_child.parent_id = kNilId; + new_parent.child_id[new_child_side] = child_id; + old_parent.child_id[old_child_side] = kNilId; + + for (int i = 0; i < 2; ++i) { + new_parent.subtree_count[i] += + child.subtree_count[i] - replaced_child.subtree_count[i]; + old_parent.subtree_count[i] -= child.subtree_count[i]; + } +} + +// parent node | +// / \ / \ | +// node y --> x parent | +// / \ / \ | +// x mid mid y | +void SplayTree::Zig(index_t node_id, index_t parent_id, int parent_side) { + NodeType& node = nodes_[node_id]; + NodeType& parent = nodes_[parent_id]; + + // zig is only called when parent is the root of the tree + // + ARROW_DCHECK(parent.parent_id == kNilId); + + // Rearrange tree nodes + // + SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); + + // At this point we have: | + // | + // nil nil | + // | | | + // node + parent | + // / \ / \ | + // x nil mid y | + // + + // Connect parent to node + // + node.child_id[1 - parent_side] = parent_id; + parent.parent_id = node_id; + for (int i = 0; i < 2; ++i) { + node.subtree_count[i] += parent.subtree_count[i]; + } + root_id_ = node_id; +} + +// grandparent node | +// / \ / \ | +// parent y x parent | +// / \ --> / \ | +// node mid1 mid0 grandparent | +// / \ / \ | +// x mid0 mid1 y | +void SplayTree::ZigZig(index_t node_id, index_t parent_id, index_t grandparent_id, + int parent_side) { + NodeType& node = nodes_[node_id]; + NodeType& parent = nodes_[parent_id]; + NodeType& grandparent = nodes_[grandparent_id]; + + // Rearrange tree nodes + // + SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); + SwitchParent(parent_id, 1 - parent_side, grandparent_id, parent_side); + + // At this point we have: | + // | + // nil nil z | + // | | | | + // node + parent + grandparent | + // / \ / \ / \ | + // x nil mid0 nil mid1 y | + // + + // Connect grandparent to parent + // + node.parent_id = grandparent.parent_id; + parent.child_id[1 - parent_side] = grandparent_id; + grandparent.parent_id = parent_id; + for (int i = 0; i < 2; ++i) { + parent.subtree_count[i] += grandparent.subtree_count[i]; + } + + // Connect parent to node + // + node.child_id[1 - parent_side] = parent_id; + parent.parent_id = node_id; + for (int i = 0; i < 2; ++i) { + node.subtree_count[i] += parent.subtree_count[i]; + } + if (root_id_ == grandparent_id) { + root_id_ = node_id; + } +} + +// grandparent node | +// / \ / \ | +// parent y parent grandparent | +// / \ --> /\ / \ | +// x node x mid0 mid1 y | +// / \ | +// mid0 mid1 | +void SplayTree::ZigZag(index_t node_id, index_t parent_id, index_t grandparent_id, + int parent_side, int grandparent_side) { + NodeType& node = nodes_[node_id]; + NodeType& parent = nodes_[parent_id]; + NodeType& grandparent = nodes_[grandparent_id]; + + // Rearrange tree nodes + // + SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); + SwitchParent(node_id, parent_side, grandparent_id, 1 - parent_side); + + // At this point we have: | + // | + // nil nil z | + // | | | | + // node + parent + grandparent | + // / \ / \ / \ | + // nil nil x mid0 mid1 y | + // | + + // Connect parent and grandparent to node + // + node.parent_id = grandparent.parent_id; + node.child_id[1 - parent_side] = parent_id; + node.child_id[parent_side] = grandparent_id; + parent.parent_id = node_id; + grandparent.parent_id = node_id; + for (int i = 0; i < 2; ++i) { + node.subtree_count[i] += parent.subtree_count[i] + grandparent.subtree_count[i]; + } + if (root_id_ == grandparent_id) { + root_id_ = node_id; + } +} + +void SplayTree::Splay(index_t node_id) { + for (;;) { + NodeType& node = nodes_[node_id]; + index_t parent_id = node.parent_id; + if (parent_id == kNilId) { + break; + } + NodeType& parent = nodes_[parent_id]; + int parent_side = (parent.child_id[0] == node_id ? 0 : 1); + index_t grandparent_id = parent.parent_id; + if (grandparent_id == kNilId) { + Zig(node_id, parent_id, parent_side); + continue; + } + NodeType& grandparent = nodes_[grandparent_id]; + int grandparent_side = (grandparent.child_id[0] == parent_id ? 0 : 1); + if (parent_side == grandparent_side) { + ZigZig(node_id, parent_id, grandparent_id, parent_side); + } else { + ZigZag(node_id, parent_id, grandparent_id, parent_side, grandparent_side); + } + } +} + +// Find the node with the given value if exists. +// Otherwise find the place in the tree where the new value would be +// inserted (its parent and parent's child index). +// +void SplayTree::Find(int64_t value, int counter_id, index_t* parent_id, int* parent_side, + index_t* node_id, index_t* count_less) const { + *parent_id = kNilId; + *parent_side = 0; + *count_less = 0; + + *node_id = root_id_; + for (;;) { + if (*node_id == kNilId) { + return; + } + const NodeType& node = nodes_[*node_id]; + const NodeType& left_child = nodes_[node.child_id[0]]; + if (value == node.value) { + *count_less += left_child.subtree_count[counter_id]; + return; + } + int direction = value < node.value ? 0 : 1; + if (direction == 1) { + *count_less += left_child.subtree_count[counter_id] + + (counter_id == kCountStar ? node.value_count : 1); + } + *parent_id = *node_id; + *parent_side = direction; + *node_id = node.child_id[direction]; + } +} + +void SplayTree::ValidateVisit(index_t node_id, index_t* count, index_t* count_distinct) { + ARROW_DCHECK(node_id != kNilId); + *count = nodes_[node_id].value_count; + *count_distinct = 1; + for (int side = 0; side < 2; ++side) { + if (nodes_[node_id].child_id[side] != kNilId) { + index_t count_child, count_distinct_child; + ValidateVisit(nodes_[node_id].child_id[side], &count_child, &count_distinct_child); + *count += count_child; + *count_distinct += count_distinct_child; + } + } + ARROW_DCHECK(*count == nodes_[node_id].subtree_count[kCountStar]); + ARROW_DCHECK(*count_distinct == nodes_[node_id].subtree_count[kCountDistinctValue]); +} + +void SplayTree::ValidateTree() { + index_t count = 0; + index_t count_distinct = 0; + if (root_id_ != kNilId) { + ValidateVisit(root_id_, &count, &count_distinct); + } + ARROW_DCHECK(nodes_.size() == empty_slots_.size() + count + /*extra one for kNilId*/ 1); +} + +template +int SplayTree::Print_StrLen(const T& value) { + std::string s = std::to_string(value); + return static_cast(s.length()); +} + +std::string SplayTree::Print_Label(index_t node_id) const { + const NodeType& node = nodes_[node_id]; + return std::string("(") + std::to_string(node.value) + "," + + std::to_string(node.value_count) + "," + std::to_string(node.subtree_count[0]) + + "," + std::to_string(node.subtree_count[1]) + ")"; +} + +void SplayTree::Print_BoxWH(index_t node_id, std::map& boxes) { + // Recursively compute box size for left and right child if they exist + // + bool has_child[2]; + for (int ichild = 0; ichild < 2; ++ichild) { + has_child[ichild] = (nodes_[node_id].child_id[ichild] != kNilId); + if (has_child[ichild]) { + Print_BoxWH(nodes_[node_id].child_id[ichild], boxes); + } + } + + PrintBox box; + int label_size = static_cast(Print_Label(node_id).length()); + + if (!has_child[0] && !has_child[1] == 0) { + box.root_x = 0; + box.w = label_size; + box.h = 1; + } else if (has_child[0] && has_child[1]) { + // Both children + PrintBox left_box = boxes.find(nodes_[node_id].child_id[0])->second; + PrintBox right_box = boxes.find(nodes_[node_id].child_id[1])->second; + box.w = left_box.w + right_box.w + 1; + box.h = std::max(left_box.h, right_box.h) + 4; + int mid = (left_box.w + right_box.w + 1) / 2; + box.root_x = + std::min(std::max(mid, left_box.root_x), left_box.w + 1 + right_box.root_x); + box.w = std::max(box.w, box.root_x + label_size); + } else { + // One child + int ichild = (has_child[0] ? 0 : 1); + PrintBox child_box = boxes.find(nodes_[node_id].child_id[ichild])->second; + box.h = child_box.h + 4; + box.w = child_box.w; + box.root_x = box.w / 2; + box.w = std::max(box.w, box.root_x + label_size); + } + + boxes.insert(std::make_pair(node_id, box)); +} + +void SplayTree::Print_BoxXY(int x, int y, index_t node_id, + std::map& boxes) { + PrintBox& box = boxes.find(node_id)->second; + box.x += x; + box.y += y; + bool has_child[2]; + for (int ichild = 0; ichild < 2; ++ichild) { + has_child[ichild] = (nodes_[node_id].child_id[ichild] != kNilId); + } + if (has_child[0] && has_child[1]) { + Print_BoxXY(x, y + 4, nodes_[node_id].child_id[0], boxes); + Print_BoxXY(x + boxes.find(nodes_[node_id].child_id[0])->second.w + 1, y + 4, + nodes_[node_id].child_id[1], boxes); + } else if (has_child[0] || has_child[1]) { + Print_BoxXY(x, y + 4, nodes_[node_id].child_id[has_child[0] ? 0 : 1], boxes); + } +} + +void SplayTree::Print_PutChar(std::vector>& canvas, int x, int y, + char c) { + if (y >= static_cast(canvas.size())) { + canvas.resize(y + 1); + } + if (x >= static_cast(canvas[y].size())) { + canvas[y].resize(x + 1); + } + canvas[y][x] = c; +} + +void SplayTree::Print_PutString(std::vector>& canvas, int x, int y, + std::string str) { + for (size_t i = 0; i < str.length(); ++i) { + Print_PutChar(canvas, x, y, str[i]); + } +} + +void SplayTree::Print_Node(index_t node_id, std::map& boxes, + std::vector>& canvas) { + PrintBox box = boxes.find(node_id)->second; + Print_PutString(canvas, box.root_x, box.y, Print_Label(node_id)); + for (int ichild = 0; ichild < 2; ++ichild) { + if (nodes_[node_id].child_id[ichild] != kNilId) { + PrintBox child_box = boxes.find(nodes_[node_id].child_id[ichild])->second; + int top_x = child_box.root_x; + int bottom_x = box.root_x + ichild; + Print_PutChar(canvas, top_x, box.y + 3, '|'); + for (int x = std::min(bottom_x, top_x); x <= std::max(bottom_x, top_x); ++x) { + Print_PutChar(canvas, x, box.y + 2, '-'); + } + Print_PutChar(canvas, bottom_x, box.y + 1, '|'); + Print_Node(nodes_[node_id].child_id[ichild], boxes, canvas); + } + } +} + +void SplayTree::Print() { + if (root_id_ == kNilId) { + return; + } + std::map boxes; + Print_BoxWH(root_id_, boxes); + Print_BoxXY(0, 0, root_id_, boxes); + std::vector> canvas; + Print_Node(root_id_, boxes, canvas); + + const char* filename = "splay_tree_output.txt"; + FILE* fout; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + fopen_s(&fout, filename, "wt"); +#else + fout = fopen(filename, "wt"); +#endif + + for (size_t y = 0; y < canvas.size(); ++y) { + for (size_t x = 0; x < canvas[y].size(); ++x) { + fprintf(fout, "%c", canvas[y][x]); + } + fprintf(fout, "\n"); + } + fclose(fout); +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/splay_tree.h b/cpp/src/arrow/compute/exec/window_functions/splay_tree.h new file mode 100644 index 00000000000..604ca4cf91a --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/splay_tree.h @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include "arrow/compute/exec/util.h" // for ARROW_DCHECK + +namespace arrow { +namespace compute { + +class SplayTree { + public: + using index_t = int32_t; + + SplayTree(); + + void Insert(int64_t value); + + void Remove(int64_t value); + + void Clear(); + + // Value does not need to be present + int64_t Rank(bool ties_low, int64_t value); + + // Value does not need to be present + int64_t DenseRank(int64_t value); + + private: + static constexpr index_t kNilId = 0; + static constexpr int kCountStar = 0; + static constexpr int kCountDistinctValue = 1; + + struct NodeType { + int64_t value; + index_t value_count; + + index_t subtree_count[2]; + + index_t parent_id; + index_t child_id[2]; + }; + + std::vector nodes_; + index_t root_id_; + std::vector empty_slots_; + + index_t AllocateNode(); + + void DeallocateNode(index_t node_id); + + void SwitchParent(index_t old_parent_id, int old_child_side, index_t new_parent_id, + int new_child_side); + + // parent node | + // / \ / \ | + // node y --> x parent | + // / \ / \ | + // x mid mid y | + void Zig(index_t node_id, index_t parent_id, int parent_side); + + // grandparent node | + // / \ / \ | + // parent y x parent | + // / \ --> / \ | + // node mid1 mid0 grandparent | + // / \ / \ | + // x mid0 mid1 y | + void ZigZig(index_t node_id, index_t parent_id, index_t grandparent_id, + int parent_side); + + // grandparent node | + // / \ / \ | + // parent y parent grandparent | + // / \ --> /\ / \ | + // x node x mid0 mid1 y | + // / \ | + // mid0 mid1 | + void ZigZag(index_t node_id, index_t parent_id, index_t grandparent_id, int parent_side, + int grandparent_side); + + void Splay(index_t node_id); + + // Find the node with the given value if exists. + // Otherwise find the place in the tree where the new value would be + // inserted (its parent and parent's child index). + // + void Find(int64_t value, int counter_id, index_t* parent_id, int* parent_side, + index_t* node_id, index_t* count_less) const; + + void ValidateVisit(index_t node_id, index_t* count, index_t* count_distinct); + + void ValidateTree(); + + template + static int Print_StrLen(const T& value); + + struct PrintBox { + int x, y, w, h; + int root_x; + }; + + std::string Print_Label(index_t node_id) const; + + void Print_BoxWH(index_t node_id, std::map& boxes); + + void Print_BoxXY(int x, int y, index_t node_id, std::map& boxes); + + void Print_PutChar(std::vector>& canvas, int x, int y, char c); + + void Print_PutString(std::vector>& canvas, int x, int y, + std::string str); + + void Print_Node(index_t node_id, std::map& boxes, + std::vector>& canvas); + + void Print(); +}; + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/window_frame.h b/cpp/src/arrow/compute/exec/window_functions/window_frame.h new file mode 100644 index 00000000000..4e507eb0f3e --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/window_frame.h @@ -0,0 +1,143 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include "arrow/compute/exec/util.h" + +namespace arrow { +namespace compute { + +struct WindowFrames { + static constexpr int kMaxRangesInFrame = 3; + + int num_ranges_in_frame; + int64_t num_frames; + + // Range can be empty, in that case begin == end. Otherwise begin < end. + // + // Ranges in a single frame must be disjoint but begin of next range can be + // equal to the end of the previous one. + // + const int64_t* begins[kMaxRangesInFrame]; + const int64_t* ends[kMaxRangesInFrame]; + + // Row filter has bits set to 0 for rows that should not be included in the + // range. + // + // Null row filter means that all rows are qualified. + // + const uint8_t* row_filter; + + bool FramesProgressing() const { + for (int64_t i = 1; i < num_frames; ++i) { + if (!(begins[i] >= begins[i - 1] && ends[i] >= ends[i - 1])) { + return false; + } + } + return true; + } + + bool FramesExpanding() const { + for (int64_t i = 1; i < num_frames; ++i) { + if (!((begins[i] >= ends[i - 1] || begins[i] == begins[i - 1]) && + (ends[i] >= ends[i - 1]))) { + return false; + } + } + return true; + } +}; + +inline void GenerateTestFrames(Random64BitCopy& rand, int64_t num_rows, + std::vector& begins, std::vector& ends, + bool progressive, bool expansive) { + begins.resize(num_rows); + ends.resize(num_rows); + + if (!progressive && !expansive) { + constexpr int64_t max_frame_length = 100; + for (int64_t i = 0; i < num_rows; ++i) { + int64_t length = + rand.from_range(static_cast(0), std::min(num_rows, max_frame_length)); + int64_t begin = rand.from_range(static_cast(0), num_rows - length); + begins[i] = begin; + ends[i] = begin + length; + } + } else if (progressive && !expansive) { + int64_t dist = rand.from_range(static_cast(1), + std::max(static_cast(1), num_rows / 4)); + std::vector pos; + for (int64_t i = 0; i < num_rows + dist; ++i) { + pos.push_back(rand.from_range(static_cast(0), num_rows)); + } + std::sort(pos.begin(), pos.end()); + for (int64_t i = 0; i < num_rows; ++i) { + begins[i] = pos[i]; + ends[i] = pos[i + dist]; + } + } else { + int64_t num_partitions = + rand.from_range(static_cast(1), bit_util::CeilDiv(num_rows, 128LL)); + std::set partition_ends_set; + std::vector partition_ends; + partition_ends_set.insert(num_rows); + partition_ends.push_back(num_rows); + for (int64_t i = 1; i < num_partitions; ++i) { + int64_t partition_end; + for (;;) { + partition_end = rand.from_range(static_cast(1), num_rows - 1); + if (partition_ends_set.find(partition_end) == partition_ends_set.end()) { + break; + } + } + partition_ends.push_back(partition_end); + partition_ends_set.insert(partition_end); + } + std::sort(partition_ends.begin(), partition_ends.end()); + for (int64_t ipartition = 0; ipartition < num_partitions; ++ipartition) { + int64_t partition_begin = ipartition == 0 ? 0LL : partition_ends[ipartition - 1]; + int64_t partition_end = partition_ends[ipartition]; + int64_t partition_length = partition_end - partition_begin; + int64_t begin = rand.from_range(0LL, 2LL); + + if (begin >= partition_length) { + begin = partition_length - 1; + } + int64_t end = begin + rand.from_range(0LL, 2LL); + if (end > partition_length) { + end = partition_length; + } + begins[partition_begin + 0] = partition_begin + begin; + ends[partition_begin + 0] = partition_begin + end; + for (int64_t i = 1; i < partition_length; ++i) { + int64_t end_step = rand.from_range(0LL, 2LL); + end += end_step; + if (end > partition_length) { + end = partition_length; + } + begins[partition_begin + i] = partition_begin + begin; + ends[partition_begin + i] = partition_begin + end; + } + } + } +} + +} // namespace compute +} // namespace arrow \ No newline at end of file diff --git a/cpp/src/arrow/compute/exec/window_functions/window_rank.cc b/cpp/src/arrow/compute/exec/window_functions/window_rank.cc new file mode 100644 index 00000000000..ebd6f8123fc --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/window_rank.cc @@ -0,0 +1,821 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/exec/window_functions/window_rank.h" + +namespace arrow { +namespace compute { + +class GroupPrevRankCalculator { + public: + GroupPrevRankCalculator(int64_t num_rows, const int64_t* group_ids_sorted, + const int64_t* permutation) { + int64_t num_bit_words = bit_util::CeilDiv(num_rows, 64); + + last_in_group_bitvec_.resize(num_bit_words); + memset(last_in_group_bitvec_.data(), 0, num_bit_words * sizeof(uint64_t)); + for (int64_t i = 0; i < num_rows; ++i) { + bool last_in_group = + (i == (num_rows - 1) || group_ids_sorted[i + 1] != group_ids_sorted[i]); + if (last_in_group) { + bit_util::SetBit(reinterpret_cast(last_in_group_bitvec_.data()), + permutation[i]); + } + } + last_in_group_popcounts_.resize(num_bit_words); + BitVectorNavigator::GenPopCounts(num_rows, last_in_group_bitvec_.data(), + last_in_group_popcounts_.data()); + + num_rows_ = num_rows; + num_groups_ = BitVectorNavigator::PopCount(num_rows, last_in_group_bitvec_.data(), + last_in_group_popcounts_.data()); + + first_in_group_bitvec_.resize(num_bit_words); + memset(first_in_group_bitvec_.data(), 0, num_bit_words * sizeof(uint64_t)); + for (int64_t i = 0; i < num_rows; ++i) { + bool first_in_group = (i == 0 || group_ids_sorted[i - 1] != group_ids_sorted[i]); + if (first_in_group) { + bit_util::SetBit(reinterpret_cast(first_in_group_bitvec_.data()), + permutation[i]); + } + } + first_in_group_popcounts_.resize(num_bit_words); + BitVectorNavigator::GenPopCounts(num_rows, first_in_group_bitvec_.data(), + first_in_group_popcounts_.data()); + } + + // Prev is 0-based row number + // Returns 0-based row number + int64_t Rank(int64_t row_number, int64_t prev) { + if (bit_util::GetBit(reinterpret_cast(first_in_group_bitvec_.data()), + row_number)) { + return BitVectorNavigator::Rank(row_number, first_in_group_bitvec_.data(), + first_in_group_popcounts_.data()); + } else { + return num_groups_ + prev - + BitVectorNavigator::Rank(prev, last_in_group_bitvec_.data(), + last_in_group_popcounts_.data()); + } + } + // Prev is 0-based row number + // Returns 0-based row number + int64_t RankEnd(int64_t prev_end) { + if (prev_end == num_rows_) { + return num_rows_; + } + return num_groups_ + prev_end - + BitVectorNavigator::Rank(prev_end, last_in_group_bitvec_.data(), + last_in_group_popcounts_.data()); + } + + private: + int64_t num_rows_; + int64_t num_groups_; + std::vector last_in_group_bitvec_; + std::vector last_in_group_popcounts_; + std::vector first_in_group_bitvec_; + std::vector first_in_group_popcounts_; +}; + +void WindowRank::Global(RankType rank_type, int64_t num_rows, const uint64_t* ties_bitvec, + const uint64_t* ties_popcounts, int64_t* output, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + switch (rank_type) { + case RankType::RANK_TIES_LOW: + case RankType::RANK_TIES_HIGH: + GlobalRank(rank_type == RankType::RANK_TIES_LOW, num_rows, ties_bitvec, + ties_popcounts, output, hardware_flags, temp_vector_stack); + break; + case RankType::DENSE_RANK: + GlobalDenseRank(num_rows, ties_bitvec, ties_popcounts, output); + break; + case RankType::ROW_NUMBER: + GlobalRowNumber(num_rows, output); + break; + } +} + +void WindowRank::WithinFrame(RankType rank_type, int64_t num_rows, + const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, + const int64_t* frame_begins, const int64_t* frame_ends, + int64_t* output, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + switch (rank_type) { + case RankType::RANK_TIES_LOW: + case RankType::RANK_TIES_HIGH: + RankWithinFrame(rank_type == RankType::RANK_TIES_LOW, num_rows, ties_bitvec, + ties_popcounts, frame_begins, frame_ends, output, hardware_flags, + temp_vector_stack); + break; + case RankType::DENSE_RANK: + DenseRankWithinFrame(num_rows, ties_bitvec, ties_popcounts, frame_begins, + frame_ends, output); + break; + case RankType::ROW_NUMBER: + RowNumberWithinFrame(num_rows, frame_begins, frame_ends, output); + break; + } +} + +void WindowRank::OnSeparateAttribute(RankType rank_type, int64_t num_rows, + const int64_t* global_ranks_sorted, + const int64_t* permutation, bool progressive_frames, + const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + switch (rank_type) { + case RankType::ROW_NUMBER: + case RankType::RANK_TIES_LOW: + case RankType::RANK_TIES_HIGH: + if (!progressive_frames) { + SeparateAttributeRank(rank_type == RankType::RANK_TIES_LOW, num_rows, + frame_begins, frame_ends, global_ranks_sorted, permutation, + output, hardware_flags, temp_vector_stack); + } else { + ProgressiveSeparateAttributeRank( + /*dense_rank=*/false, rank_type == RankType::RANK_TIES_LOW, num_rows, + frame_begins, frame_ends, global_ranks_sorted, output); + } + break; + case RankType::DENSE_RANK: + if (!progressive_frames) { + SeparateAttributeDenseRank(num_rows, frame_begins, frame_ends, + global_ranks_sorted, permutation, output, + hardware_flags, temp_vector_stack); + } else { + ProgressiveSeparateAttributeRank( + /*dense_rank=*/true, false, num_rows, frame_begins, frame_ends, + global_ranks_sorted, output); + } + break; + } +} + +void WindowRank::GlobalRank(bool ties_low, int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, int64_t* output, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + // Range of indices for groups of ties in entire input + int64_t rank_begin = 0; + int64_t rank_end = BitVectorNavigator::PopCount(num_rows, bitvec, popcounts); + + // Break groups of ties into minibatches + int64_t minibatch_length_max = util::MiniBatch::kMiniBatchLength - 1; + auto selects_buf = util::TempVectorHolder( + temp_vector_stack, static_cast(minibatch_length_max + 1)); + auto selects = selects_buf.mutable_data(); + for (int64_t minibatch_begin = rank_begin; minibatch_begin < rank_end; + minibatch_begin += minibatch_length_max) { + int64_t minibatch_end = std::min(rank_end, minibatch_begin + minibatch_length_max); + + // Get first (and last) row number for each group of ties in minibatch + BitVectorNavigator::SelectsForRangeOfRanks(minibatch_begin, minibatch_end + 1, + num_rows, bitvec, popcounts, selects, + hardware_flags, temp_vector_stack); + if (ties_low) { + for (int64_t ties_group = 0; ties_group < minibatch_end - minibatch_begin; + ++ties_group) { + for (int64_t i = selects[ties_group]; i < selects[ties_group + 1]; ++i) { + output[i] = selects[ties_group] + 1; + } + } + } else { + for (int64_t ties_group = 0; ties_group < minibatch_end - minibatch_begin; + ++ties_group) { + for (int64_t i = selects[ties_group]; i < selects[ties_group + 1]; ++i) { + output[i] = selects[ties_group + 1]; + } + } + } + } +} + +void WindowRank::GlobalDenseRank(int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, int64_t* output) { + for (int64_t i = 0; i < num_rows; ++i) { + output[i] = BitVectorNavigator::RankNext(i, bitvec, popcounts); + } +} + +void WindowRank::GlobalRowNumber(int64_t num_rows, int64_t* output) { + std::iota(output, output + num_rows, 1LL); +} + +void WindowRank::RankWithinFrame(bool ties_low, int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + GlobalRank(ties_low, num_rows, bitvec, popcounts, output, hardware_flags, + temp_vector_stack); + for (int64_t i = 0; i < num_rows; ++i) { + // If the frame does not contain current row it is still logically + // considered as included in the frame (e.g. empty frame will yield rank + // 1 since the set we look at consists of a single row - current row). + + // The case of an empty frame + if (frame_begins[i] >= frame_ends[i]) { + output[i] = 1; + continue; + } + + bool tie_with_first = + BitVectorNavigator::RankNext(i, bitvec, popcounts) == + BitVectorNavigator::RankNext(frame_begins[i], bitvec, popcounts); + bool tie_with_last = + BitVectorNavigator::RankNext(i, bitvec, popcounts) == + BitVectorNavigator::RankNext(frame_ends[i] - 1, bitvec, popcounts); + if (!tie_with_first) { + if (i < frame_begins[i]) { + output[i] = 1; + } else if (i >= frame_ends[i]) { + if (tie_with_last) { + output[i] -= frame_begins[i]; + } else { + output[i] = frame_ends[i] - frame_begins[i] + 1; + } + } else { + output[i] -= frame_begins[i]; + } + } else { + if (tie_with_last) { + output[i] = 1; + } else { + // Bit vector rank of current row is the same as the beginning of + // the frame but different than for the last row of the frame, which + // means that current row must appear before the last row of the + // frame. + // + ARROW_DCHECK(i < frame_ends[i]); + if (ties_low) { + output[i] = 1; + } else { + if (i < frame_begins[i]) { + output[i] -= frame_begins[i] - 1; + } else { + output[i] -= frame_begins[i]; + } + } + } + } + } +} + +void WindowRank::DenseRankWithinFrame(int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, + const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output) { + for (int64_t i = 0; i < num_rows; ++i) { + if (frame_begins[i] >= frame_ends[i]) { + output[i] = 1; + continue; + } + + if (i < frame_begins[i]) { + output[i] = 1; + continue; + } + if (i >= frame_ends[i]) { + bool tie_with_last = + BitVectorNavigator::RankNext(i, bitvec, popcounts) == + BitVectorNavigator::RankNext(frame_ends[i] - 1, bitvec, popcounts); + output[i] = BitVectorNavigator::RankNext(frame_ends[i] - 1, bitvec, popcounts) - + BitVectorNavigator::RankNext(frame_begins[i], bitvec, popcounts) + 1 + + (tie_with_last ? 0 : 1); + continue; + } + + output[i] = BitVectorNavigator::RankNext(i, bitvec, popcounts) - + BitVectorNavigator::RankNext(frame_begins[i], bitvec, popcounts) + 1; + } +} + +void WindowRank::RowNumberWithinFrame(int64_t num_rows, const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output) { + for (int64_t i = 0; i < num_rows; ++i) { + if (frame_begins[i] >= frame_ends[i]) { + output[i] = 1; + continue; + } + + if (i < frame_begins[i]) { + output[i] = 1; + continue; + } + + if (i >= frame_ends[i]) { + output[i] = frame_ends[i] - frame_begins[i] + 1; + continue; + } + + output[i] = i - frame_begins[i] + 1; + } +} + +void WindowRank::SeparateAttributeRank( + bool ties_low, + /* number of rows and number of frames */ + int64_t num_rows, const int64_t* begins, const int64_t* ends, + /* Sorted (in ascending order) ranks (with respect to ranking attribute) + for all rows */ + /* null can be passed if all ranks are distinct (in which case the + sorted array would just contain sequence of integers from 1 to num_rows). + Supplying null changes the semantics from rank to row number. */ + const int64_t* ranks_sorted, + /* Permutation of row numbers that results in sortedness on ranking + attribute */ + const int64_t* permutation, int64_t* output, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack) { + // Build merge tree + // + MergeTree merge_tree; + merge_tree.Build(num_rows, permutation, + /* number of top levels to skip */ 0, hardware_flags, + temp_vector_stack); + + // Ties low means outputting the number of rows in window frame with rank + // lower than current row plus 1. Initialize output counter accordingly. + // + int64_t delta = ties_low ? 1 : 0; + std::fill_n(output, num_rows, delta); + + // For each row compute the number of rows with the lower rank (lower or + // equal in case of ties high). + // + // This will be used as an upper bound on rank attribute when querying + // merge tree. + // + std::vector y_ends(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + y_ends[permutation[i]] = (ranks_sorted ? ranks_sorted[i] : (i + 1)) + delta; + } + + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) + + merge_tree.MiniBatchRangeQuery(batch_length, begins + batch_begin, ends + batch_begin, + y_ends.data() + batch_begin, temp_vector_stack, + [&](int64_t iquery, int64_t node_begin, int64_t y_end) { + output[batch_begin + iquery] += y_end - node_begin; + }); + + END_MINI_BATCH_FOR +} + +void WindowRank::SeparateAttributeDenseRank( + int64_t num_rows, const int64_t* begins, const int64_t* ends, + /* The following two arrays must be the result of sorting rows on + (global dense rank, row number within window) pairs. Within a group of + peers with that same dense ranks rows must come in the order in which + they appeared in the window. This could be accomplished by a stable + sort of window on the dense rank value. + */ + const int64_t* global_dense_ranks_sorted, const int64_t* permutation, int64_t* output, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack) { + // Mapping to the coordinates (x, y, z) used by range tree is described + // below. + // + // Definitions are picked so that for each attribute every row has a + // distinct coordinate in the range [0, num_rows). + // + // The coordinates correspond to position in the sorted array for + // different sort orders: x - sorting on previous occurrence of the row + // with the same global dense rank y - window ordering z - sorting on + // global dense rank + // + // x: + // - for the first row in each dense rank group, dense rank minus 1, + // for other rows, number of rows preceding the previous row in the same + // dense rank group that are not the last in their dense rank groups. + // Alternative way of viewing this is the position in the array sorted on + // the following function: 1 plus position in the window sort order of the + // previous occurrence of the row with the same dense rank (current row's + // peer in the global dense rank group) or 0 if there is no previous + // occurrence. + // - exclusive upper bound for this attribute in the range query for ith + // frame is: number of dense rank groups plus the number of rows preceding + // begins[i] in the window order that are not the last in their respective + // dense rank groups. + // Alternative way of viewing this is std::lower_bound for begins[i] + 1 + // in the sorted array introduced in the description of an alternative + // view of x attribute. + // + // y: + // - row number in the window sort order (sort order used to + // compute frame boundaries) + // - range query uses begins[i], ends[i] as a range filter on this + // attribute for ith frame + // + // z: + // - position in the array sorted on global dense rank + // - range query uses number of rows with global dense rank less than that + // of the current row of ith frame + // + GroupPrevRankCalculator x_calc(num_rows, global_dense_ranks_sorted, permutation); + + RangeTree tree; + { + std::vector x_sorted_on_z(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + bool has_next = (i < num_rows - 1) && + (global_dense_ranks_sorted[i] == global_dense_ranks_sorted[i + 1]); + if (has_next) { + int64_t y = permutation[i + 1]; + int64_t z = i + 1; + int64_t prev = permutation[i]; + int64_t x = x_calc.Rank(y, prev); + x_sorted_on_z[z] = x; + } + bool has_prev = + (i > 0) && (global_dense_ranks_sorted[i] == global_dense_ranks_sorted[i - 1]); + if (!has_prev) { + int64_t y = permutation[i]; + int64_t z = i; + int64_t x = x_calc.Rank(y, -1); + x_sorted_on_z[z] = x; + } + } + + const int64_t* y_sorted_on_z = permutation; + tree.Build(num_rows, x_sorted_on_z.data(), y_sorted_on_z, hardware_flags, + temp_vector_stack); + } + + // For each frame compute upper bound on z coordinate + // + std::vector z_ends(num_rows); + int64_t first_in_group; + for (int64_t i = 0; i < num_rows; ++i) { + bool is_first_in_group = + (i == 0) || global_dense_ranks_sorted[i - 1] != global_dense_ranks_sorted[i]; + if (is_first_in_group) { + first_in_group = i; + } + z_ends[permutation[i]] = first_in_group; + } + + TEMP_VECTOR(int64_t, x_ends); + + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) + + for (int64_t i = batch_begin; i < batch_begin + batch_length; ++i) { + x_ends[i - batch_begin] = x_calc.RankEnd(begins[i]); + } + + tree.BoxCount(batch_length, x_ends, begins + batch_begin, ends + batch_begin, + z_ends.data() + batch_begin, output + batch_begin, hardware_flags, + temp_vector_stack); + + // Output is 1 plus the number of rows satisfying range query + // + for (int64_t i = batch_begin; i < batch_begin + batch_length; ++i) { + ++output[i]; + } + + END_MINI_BATCH_FOR +} + +void WindowRank::ProgressiveSeparateAttributeRank(bool dense_rank, bool ties_low, + int64_t num_rows, const int64_t* begins, + const int64_t* ends, + const int64_t* global_ranks, + int64_t* output) { + if (dense_rank) { + ProgressiveSeparateAttributeRankImp(false, num_rows, begins, ends, global_ranks, + output); + } else { + ProgressiveSeparateAttributeRankImp(ties_low, num_rows, begins, ends, + global_ranks, output); + } +} + +template +void WindowRank::ProgressiveSeparateAttributeRankImp(bool ties_low, int64_t num_rows, + const int64_t* begins, + const int64_t* ends, + const int64_t* global_ranks, + int64_t* output) { + SplayTree tree; + int64_t begin = begins[0]; + int64_t end = begin; + + for (int64_t iframe = 0; iframe < num_rows; ++iframe) { + int64_t frame_begin = begins[iframe]; + int64_t frame_end = ends[iframe]; + ARROW_DCHECK(frame_begin >= begin && frame_end >= end); + + if (end <= frame_begin) { + tree.Clear(); + begin = end = frame_begin; + } + + while (begin < frame_begin) { + tree.Remove(global_ranks[begin++]); + ARROW_DCHECK(begin <= end); + } + while (frame_end > end) { + tree.Insert(global_ranks[end++]); + } + + if (T_DENSE_RANK) { + output[iframe] = tree.DenseRank(global_ranks[iframe]); + } else { + output[iframe] = tree.Rank(ties_low, global_ranks[iframe]); + } + } +} + +void WindowRankBasic::Global(RankType rank_type, int64_t num_rows, const uint64_t* bitvec, + int64_t* output) { + int64_t current_group_id; + int64_t first_in_group; + int64_t num_in_group; + for (int64_t i = 0; i < num_rows; ++i) { + if (i == 0) { + current_group_id = 0; + first_in_group = 0; + num_in_group = 1; + for (num_in_group = 1; first_in_group + num_in_group < num_rows; ++num_in_group) { + } + } else { + if (bit_util::GetBit(reinterpret_cast(bitvec), i)) { + ++current_group_id; + first_in_group = i; + } + } + if (first_in_group == i) { + while (first_in_group + num_in_group < num_rows && + !bit_util::GetBit(reinterpret_cast(bitvec), + first_in_group + num_in_group)) { + ++num_in_group; + } + } + + switch (rank_type) { + case RankType::ROW_NUMBER: + output[i] = i + 1; + break; + case RankType::RANK_TIES_LOW: + output[i] = first_in_group + 1; + break; + case RankType::RANK_TIES_HIGH: + output[i] = first_in_group + num_in_group; + break; + case RankType::DENSE_RANK: + output[i] = current_group_id + 1; + break; + } + } +} + +void WindowRankBasic::WithinFrame(RankType rank_type, int64_t num_rows, + const uint64_t* bitvec, const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output) { + for (int64_t i = 0; i < num_rows; ++i) { + int64_t begin = frame_begins[i]; + int64_t end = frame_ends[i]; + if (end == begin) { + output[i] = 1; + continue; + } + int64_t num_words = bit_util::CeilDiv(end - begin + 1, 64); + std::vector frame_bitvec(num_words); + memset(frame_bitvec.data(), 0, num_words * sizeof(uint64_t)); + if (i < begin) { + output[i] = 1; + continue; + } + for (int64_t j = 0; j < end - begin; ++j) { + if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), j); + } + } + bool one_more_group = false; + if (i >= end) { + for (int64_t j = end; j <= i; ++j) { + if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { + one_more_group = true; + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), end - begin); + break; + } + } + } + std::vector frame_output(end - begin + 1); + Global(rank_type, end - begin + (one_more_group ? 1 : 0), frame_bitvec.data(), + frame_output.data()); + output[i] = frame_output[std::min(end, i) - begin]; + } +} + +void WindowRankBasic::SeparateAttribute(RankType rank_type, int64_t num_rows, + const int64_t* begins, const int64_t* ends, + const int64_t* global_ranks_sorted, + const int64_t* permutation, int64_t* output) { + if (num_rows == 0) { + return; + } + + std::vector inverse_permutation(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + inverse_permutation[permutation[i]] = i; + } + + for (int64_t i = 0; i < num_rows; ++i) { + int64_t begin = begins[i]; + int64_t end = ends[i]; + if (end == begin) { + output[i] = 1; + continue; + } + + // position in the array of sorted global ranks and row number + std::vector> rank_row; + for (int64_t j = begin; j < end; ++j) { + rank_row.push_back(std::make_pair(inverse_permutation[j], j)); + } + bool one_more_group = false; + if (i >= end) { + rank_row.push_back(std::make_pair(inverse_permutation[i], i)); + if (global_ranks_sorted[inverse_permutation[i]] > + global_ranks_sorted[inverse_permutation[end - 1]]) { + one_more_group = true; + } + } + + std::sort(rank_row.begin(), rank_row.end()); + + int64_t num_words = bit_util::CeilDiv(end - begin + 1, 64); + std::vector frame_bitvec(num_words); + memset(frame_bitvec.data(), 0, num_words * sizeof(uint64_t)); + if (i < begin) { + output[i] = 1; + continue; + } + for (int64_t j = 0; j < end - begin + (one_more_group ? 1 : 0); ++j) { + if (j == 0 || global_ranks_sorted[rank_row[j - 1].first] != + global_ranks_sorted[rank_row[j].first]) { + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), j); + } + } + std::vector frame_output(end - begin + 1); + Global(rank_type, end - begin + (one_more_group ? 1 : 0), frame_bitvec.data(), + frame_output.data()); + for (int64_t j = 0; j < end - begin + (one_more_group ? 1 : 0); ++j) { + if (rank_row[j].second == i) { + output[i] = frame_output[j]; + break; + } + } + } +} + +void WindowRankTest::TestRank(RankType rank_type, bool separate_ranking_attribute, + bool use_frames, bool use_progressive_frames) { + Random64BitCopy rand; + MemoryPool* pool = default_memory_pool(); + util::TempVectorStack temp_vector_stack; + Status status = temp_vector_stack.Init(pool, 128 * util::MiniBatch::kMiniBatchLength); + ARROW_DCHECK(status.ok()); + int64_t hardware_flags = 0LL; + + constexpr int num_tests = 100; + const int num_tests_to_skip = 0; + for (int test = 0; test < num_tests; ++test) { + // Generate random values + // + constexpr int64_t max_rows = 1100; + int64_t num_rows = rand.from_range(static_cast(1LL), max_rows); + std::vector vals(num_rows); + constexpr int64_t max_val = 65535; + int tie_probability = rand.from_range(0, 256); + for (int64_t i = 0; i < num_rows; ++i) { + bool tie = rand.from_range(0, 255) < tie_probability; + if (tie && i > 0) { + vals[i] = vals[rand.from_range(static_cast(0LL), i - 1)]; + } else { + vals[i] = rand.from_range(static_cast(0LL), max_val); + } + } + + // Generate random frames + // + std::vector begins; + std::vector ends; + GenerateTestFrames(rand, num_rows, begins, ends, + /*progressive=*/use_progressive_frames, + /*expansive=*/false); + + if (test < num_tests_to_skip) { + continue; + } + + // Sort values and output permutation and bit vector of ties + // + int64_t num_bit_words = bit_util::CeilDiv(num_rows, 64); + std::vector ties_bitvec(num_bit_words); + std::vector ties_popcounts(num_bit_words); + std::vector permutation(num_rows); + { + std::vector> val_row_pairs(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + val_row_pairs[i] = std::make_pair(vals[i], i); + } + std::sort(val_row_pairs.begin(), val_row_pairs.end()); + for (int64_t i = 0; i < num_rows; ++i) { + permutation[i] = val_row_pairs[i].second; + } + memset(ties_bitvec.data(), 0, num_bit_words * sizeof(uint64_t)); + for (int64_t i = 0; i < num_rows; ++i) { + bool is_first_in_group = + (i == 0 || val_row_pairs[i - 1].first != val_row_pairs[i].first); + if (is_first_in_group) { + bit_util::SetBit(reinterpret_cast(ties_bitvec.data()), i); + } + } + BitVectorNavigator::GenPopCounts(num_rows, ties_bitvec.data(), + ties_popcounts.data()); + } + + // Generate global ranks for the case when window frames use different + // row order + // + std::vector global_ranks(num_rows); + WindowRankBasic::Global(rank_type, num_rows, ties_bitvec.data(), global_ranks.data()); + + printf("num_rows %d ", static_cast(num_rows)); + + std::vector output[2]; + output[0].resize(num_rows); + output[1].resize(num_rows); + + int64_t num_repeats; +#ifndef NDEBUG + num_repeats = 1; +#else + num_repeats = std::max(1LL, 1024 * 1024LL / num_rows); +#endif + printf("num_repeats %d ", static_cast(num_repeats)); + + // int64_t start = __rdtsc(); + for (int repeat = 0; repeat < num_repeats; ++repeat) { + if (!use_frames) { + WindowRankBasic::Global(rank_type, num_rows, ties_bitvec.data(), + output[0].data()); + } else if (!separate_ranking_attribute) { + WindowRankBasic::WithinFrame(rank_type, num_rows, ties_bitvec.data(), + begins.data(), ends.data(), output[0].data()); + } else { + WindowRankBasic::SeparateAttribute(rank_type, num_rows, begins.data(), + ends.data(), global_ranks.data(), + permutation.data(), output[0].data()); + } + } + // int64_t end = __rdtsc(); + // printf("cpr basic %.1f ", + // static_cast(end - start) / static_cast(num_rows * + // num_repeats)); + // start = __rdtsc(); + for (int repeat = 0; repeat < num_repeats; ++repeat) { + if (!use_frames) { + WindowRank::Global(rank_type, num_rows, ties_bitvec.data(), ties_popcounts.data(), + output[1].data(), hardware_flags, &temp_vector_stack); + } else if (!separate_ranking_attribute) { + WindowRank::WithinFrame(rank_type, num_rows, ties_bitvec.data(), + ties_popcounts.data(), begins.data(), ends.data(), + output[1].data(), hardware_flags, &temp_vector_stack); + } else { + WindowRank::OnSeparateAttribute(rank_type, num_rows, global_ranks.data(), + permutation.data(), use_progressive_frames, + begins.data(), ends.data(), output[1].data(), + hardware_flags, &temp_vector_stack); + } + } + // end = __rdtsc(); + // printf("cpr normal %.1f ", + // static_cast(end - start) / static_cast(num_rows * + // num_repeats)); + + bool ok = true; + for (int64_t i = 0; i < num_rows; ++i) { + } + printf("%s\n", ok ? "correct" : "wrong"); + } +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/window_rank.h b/cpp/src/arrow/compute/exec/window_functions/window_rank.h new file mode 100644 index 00000000000..71eda1f995d --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/window_rank.h @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include // for std::iota +#include "arrow/compute/exec/util.h" +#include "arrow/compute/exec/window_functions/bit_vector_navigator.h" +#include "arrow/compute/exec/window_functions/merge_tree.h" +#include "arrow/compute/exec/window_functions/range_tree.h" +#include "arrow/compute/exec/window_functions/splay_tree.h" +#include "arrow/compute/exec/window_functions/window_frame.h" + +namespace arrow { +namespace compute { + +// TODO: Current row does not have to be inside its frame. +// Make sure that ranking functions behave well in that case. +// + +// TODO: Scale ranks to achieve CUME_DIST and NTILE values. + +enum class RankType : int { + ROW_NUMBER = 0, + RANK_TIES_LOW = 1, + RANK_TIES_HIGH = 2, + DENSE_RANK = 3 +}; + +class WindowRank { + public: + static void Global(RankType rank_type, int64_t num_rows, const uint64_t* ties_bitvec, + const uint64_t* ties_popcounts, int64_t* output, + int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + + static void WithinFrame(RankType rank_type, int64_t num_rows, + const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, + const int64_t* frame_begins, const int64_t* frame_ends, + int64_t* output, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack); + + static void OnSeparateAttribute(RankType rank_type, int64_t num_rows, + const int64_t* global_ranks_sorted, + const int64_t* permutation, bool progressive_frames, + const int64_t* frame_begins, const int64_t* frame_ends, + int64_t* output, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack); + + private: + static void GlobalRank(bool ties_low, int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, int64_t* output, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack); + + static void GlobalDenseRank(int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, int64_t* output); + + static void GlobalRowNumber(int64_t num_rows, int64_t* output); + + static void RankWithinFrame(bool ties_low, int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output, + int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack); + + static void DenseRankWithinFrame(int64_t num_rows, const uint64_t* bitvec, + const uint64_t* popcounts, const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output); + + static void RowNumberWithinFrame(int64_t num_rows, const int64_t* frame_begins, + const int64_t* frame_ends, int64_t* output); + + static void SeparateAttributeRank( + bool ties_low, + /* number of rows and number of frames */ + int64_t num_rows, const int64_t* begins, const int64_t* ends, + /* Sorted (in ascending order) ranks (with respect to ranking attribute) + for all rows */ + /* null can be passed if all ranks are distinct (in which case the + sorted array would just contain sequence of integers from 1 to num_rows). + Supplying null changes the semantics from rank to row number. */ + const int64_t* ranks_sorted, + /* Permutation of row numbers that results in sortedness on ranking + attribute */ + const int64_t* permutation, int64_t* output, int64_t hardware_flags, + util::TempVectorStack* temp_vector_stack); + + static void SeparateAttributeDenseRank( + int64_t num_rows, const int64_t* begins, const int64_t* ends, + /* The following two arrays must be the result of sorting rows on + (global dense rank, row number within window) pairs. Within a group of + peers with that same dense ranks rows must come in the order in which + they appeared in the window. This could be accomplished by a stable + sort of window on the dense rank value. + */ + const int64_t* global_dense_ranks_sorted, const int64_t* permutation, + int64_t* output, int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + + static void ProgressiveSeparateAttributeRank(bool dense_rank, bool ties_low, + int64_t num_rows, const int64_t* begins, + const int64_t* ends, + const int64_t* global_ranks, + int64_t* output); + + template + static void ProgressiveSeparateAttributeRankImp(bool ties_low, int64_t num_rows, + const int64_t* begins, + const int64_t* ends, + const int64_t* global_ranks, + int64_t* output); +}; + +class WindowRankBasic { + public: + static void Global(RankType rank_type, int64_t num_rows, const uint64_t* bitvec, + int64_t* output); + + static void WithinFrame(RankType rank_type, int64_t num_rows, const uint64_t* bitvec, + const int64_t* frame_begins, const int64_t* frame_ends, + int64_t* output); + + static void SeparateAttribute(RankType rank_type, int64_t num_rows, + const int64_t* begins, const int64_t* ends, + const int64_t* global_ranks_sorted, + const int64_t* permutation, int64_t* output); +}; + +class WindowRankTest { + public: + static void TestRank(RankType rank_type, bool separate_ranking_attribute, + bool use_frames, bool use_progressive_frames); +}; + +} // namespace compute +} // namespace arrow From faf10fd1f9bd7592db3bd5fa0feaf3a9b31df222 Mon Sep 17 00:00:00 2001 From: michalursa Date: Mon, 26 Sep 2022 00:43:17 -0700 Subject: [PATCH 2/3] Fixing bugs in window rank functions --- .../window_functions/bit_vector_navigator.cc | 3 +- .../exec/window_functions/merge_tree.h | 2 +- .../exec/window_functions/splay_tree.cc | 176 +++++++++++------- .../exec/window_functions/splay_tree.h | 38 ++-- .../exec/window_functions/window_rank.cc | 134 ++++++++----- .../exec/window_functions/window_rank.h | 11 +- 6 files changed, 222 insertions(+), 142 deletions(-) diff --git a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc index 104d952d6fc..038a3f921de 100644 --- a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc +++ b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc @@ -64,8 +64,7 @@ void BitVectorNavigator::SelectsForRangeOfRanks( int64_t first_select = BitVectorNavigator::Select(rank_begin, num_bits, bitvec, popcounts); - int64_t last_select = - BitVectorNavigator::Select(rank_begin, num_bits, bitvec, popcounts); + int64_t last_select = BitVectorNavigator::Select(rank_end, num_bits, bitvec, popcounts); for (int64_t minibatch_begin = first_select; minibatch_begin < last_select + 1; minibatch_begin += minibatch_length_max) { diff --git a/cpp/src/arrow/compute/exec/window_functions/merge_tree.h b/cpp/src/arrow/compute/exec/window_functions/merge_tree.h index cb5ea0761a3..e6c7b48c790 100644 --- a/cpp/src/arrow/compute/exec/window_functions/merge_tree.h +++ b/cpp/src/arrow/compute/exec/window_functions/merge_tree.h @@ -215,7 +215,7 @@ class MergeTree { Cascade_End(level + 1, y_end, &y_ends_new[0], &y_ends_new[1]); } if (y_ends_2nd[iquery] != kEmptyRangeBoundary) { - Cascade_End(level + 1, y_end, &y_ends_new[2], &y_ends_new[3]); + Cascade_End(level + 1, y_end_2nd, &y_ends_new[2], &y_ends_new[3]); } } diff --git a/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc b/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc index 97fcfd8b62e..1968b13f3f4 100644 --- a/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc +++ b/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc @@ -30,11 +30,11 @@ void SplayTree::Insert(int64_t value) { Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); if (node_id != kNilId) { + ++nodes_[node_id].value_count; + ++nodes_[node_id].subtree_count[kCountStar]; while (parent_id != kNilId) { - NodeType& node = nodes_[node_id]; - ++node.value_count; + NodeType& node = nodes_[parent_id]; ++node.subtree_count[kCountStar]; - node_id = parent_id; parent_id = node.parent_id; } @@ -56,7 +56,7 @@ void SplayTree::Insert(int64_t value) { root_id_ = new_node_id; } else { nodes_[parent_id].child_id[parent_side] = new_node_id; - Splay(node_id); + Splay(new_node_id); } nodes_[root_id_].value_count = 1; for (int i = 0; i < 2; ++i) { @@ -88,6 +88,7 @@ void SplayTree::Remove(int64_t value) { nodes_[x].subtree_count[kCountStar] -= 1; } --node->value_count; + --node->subtree_count[kCountStar]; if (node->value_count > 0) { #ifndef NDEBUG @@ -100,6 +101,7 @@ void SplayTree::Remove(int64_t value) { for (index_t x = parent_id; x != kNilId; x = nodes_[x].parent_id) { nodes_[x].subtree_count[kCountDistinctValue] -= 1; } + --node->subtree_count[kCountDistinctValue]; if (node->child_id[0] != kNilId && node->child_id[1] != kNilId) { index_t prev_node_id = node->child_id[0]; @@ -112,15 +114,27 @@ void SplayTree::Remove(int64_t value) { nodes_[x].subtree_count[kCountStar] -= prev_node.value_count; nodes_[x].subtree_count[kCountDistinctValue] -= 1; } + index_t prev_node_parent_id = nodes_[prev_node_id].parent_id; + if (nodes_[prev_node_parent_id].child_id[0] == prev_node_id) { + nodes_[prev_node_parent_id].child_id[0] = nodes_[prev_node_id].child_id[0]; + } else { + nodes_[prev_node_parent_id].child_id[1] = nodes_[prev_node_id].child_id[0]; + } + if (nodes_[prev_node_id].child_id[0] != kNilId) { + nodes_[nodes_[prev_node_id].child_id[0]].parent_id = prev_node_parent_id; + } + nodes_[prev_node_id].parent_id = kNilId; node->value = prev_node.value; node->value_count = prev_node.value_count; - node->subtree_count[kCountStar] -= 1; - node->subtree_count[kCountDistinctValue] -= 1; - node_id = prev_node_id; - parent_id = prev_node.parent_id; - node = &nodes_[node_id]; + DeallocateNode(prev_node_id); + +#ifndef NDEBUG + ValidateTree(); +#endif + + return; } for (int side = 0; side < 2; ++side) { @@ -161,7 +175,7 @@ int64_t SplayTree::Rank(bool ties_low, int64_t value) { int parent_side; index_t node_id; Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); - if (ties_low || node_id == kNilId) { + if (ties_low) { return rank + 1; } return rank + nodes_[node_id].value_count; @@ -215,11 +229,11 @@ void SplayTree::SwitchParent(index_t old_parent_id, int old_child_side, } } -// parent node | -// / \ / \ | -// node y --> x parent | -// / \ / \ | -// x mid mid y | +// parent node | +// / \ / \ | +// node y --> x parent | +// / \ / \ | +// x mid mid y | void SplayTree::Zig(index_t node_id, index_t parent_id, int parent_side) { NodeType& node = nodes_[node_id]; NodeType& parent = nodes_[parent_id]; @@ -232,13 +246,13 @@ void SplayTree::Zig(index_t node_id, index_t parent_id, int parent_side) { // SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); - // At this point we have: | - // | - // nil nil | - // | | | - // node + parent | - // / \ / \ | - // x nil mid y | + // At this point we have: + // + // nil nil | + // | | | + // node + parent | + // / \ / \ | + // x nil mid y | // // Connect parent to node @@ -251,36 +265,42 @@ void SplayTree::Zig(index_t node_id, index_t parent_id, int parent_side) { root_id_ = node_id; } -// grandparent node | -// / \ / \ | -// parent y x parent | -// / \ --> / \ | -// node mid1 mid0 grandparent | -// / \ / \ | -// x mid0 mid1 y | +// grandparent node | +// / \ / \ | +// parent y x parent | +// / \ --> / \ | +// node mid1 mid0 grandparent | +// / \ / \ | +// x mid0 mid1 y | void SplayTree::ZigZig(index_t node_id, index_t parent_id, index_t grandparent_id, int parent_side) { NodeType& node = nodes_[node_id]; NodeType& parent = nodes_[parent_id]; NodeType& grandparent = nodes_[grandparent_id]; - // Rearrange tree nodes + // Rearrange tree nodes. + // The order of the calls below is important. // - SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); SwitchParent(parent_id, 1 - parent_side, grandparent_id, parent_side); + SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); - // At this point we have: | - // | - // nil nil z | - // | | | | - // node + parent + grandparent | - // / \ / \ / \ | - // x nil mid0 nil mid1 y | + // At this point we have: // + // nil nil z | + // | | | | + // node + parent + grandparent | + // / \ / \ / \ | + // x nil mid0 nil mid1 y | + // + + node.parent_id = grandparent.parent_id; + if (node.parent_id != kNilId) { + int side = (nodes_[node.parent_id].child_id[0] == grandparent_id) ? 0 : 1; + nodes_[node.parent_id].child_id[side] = node_id; + } // Connect grandparent to parent // - node.parent_id = grandparent.parent_id; parent.child_id[1 - parent_side] = grandparent_id; grandparent.parent_id = parent_id; for (int i = 0; i < 2; ++i) { @@ -299,36 +319,48 @@ void SplayTree::ZigZig(index_t node_id, index_t parent_id, index_t grandparent_i } } -// grandparent node | -// / \ / \ | -// parent y parent grandparent | -// / \ --> /\ / \ | -// x node x mid0 mid1 y | -// / \ | -// mid0 mid1 | +// grandparent node | +// / \ / \ | +// parent y parent grandparent | +// / \ --> /\ / \ | +// x node x mid0 mid1 y | +// / \ | +// mid0 mid1 | void SplayTree::ZigZag(index_t node_id, index_t parent_id, index_t grandparent_id, int parent_side, int grandparent_side) { NodeType& node = nodes_[node_id]; NodeType& parent = nodes_[parent_id]; NodeType& grandparent = nodes_[grandparent_id]; - // Rearrange tree nodes + // Rearrange tree nodes. + // The order of the calls below is important. // - SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); SwitchParent(node_id, parent_side, grandparent_id, 1 - parent_side); + if (grandparent.child_id[1 - parent_side] != kNilId) { + for (int i = 0; i < 2; ++i) { + parent.subtree_count[i] -= + nodes_[grandparent.child_id[1 - parent_side]].subtree_count[i]; + } + } + SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); + + // At this point we have: + // + // nil nil z | + // | | | | + // node + parent + grandparent | + // / \ / \ / \ | + // nil nil x mid0 mid1 y | + // - // At this point we have: | - // | - // nil nil z | - // | | | | - // node + parent + grandparent | - // / \ / \ / \ | - // nil nil x mid0 mid1 y | - // | + node.parent_id = grandparent.parent_id; + if (node.parent_id != kNilId) { + int side = (nodes_[node.parent_id].child_id[0] == grandparent_id) ? 0 : 1; + nodes_[node.parent_id].child_id[side] = node_id; + } // Connect parent and grandparent to node // - node.parent_id = grandparent.parent_id; node.child_id[1 - parent_side] = parent_id; node.child_id[parent_side] = grandparent_id; parent.parent_id = node_id; @@ -362,6 +394,9 @@ void SplayTree::Splay(index_t node_id) { } else { ZigZag(node_id, parent_id, grandparent_id, parent_side, grandparent_side); } +#ifndef NDEBUG + ValidateTree(); +#endif } } @@ -399,18 +434,28 @@ void SplayTree::Find(int64_t value, int counter_id, index_t* parent_id, int* par void SplayTree::ValidateVisit(index_t node_id, index_t* count, index_t* count_distinct) { ARROW_DCHECK(node_id != kNilId); + ARROW_DCHECK(nodes_[node_id].parent_id == kNilId || + nodes_[nodes_[node_id].parent_id].child_id[0] == node_id || + nodes_[nodes_[node_id].parent_id].child_id[1] == node_id); *count = nodes_[node_id].value_count; - *count_distinct = 1; + *count_distinct = nodes_[node_id].value_count > 0 ? 1 : 0; for (int side = 0; side < 2; ++side) { if (nodes_[node_id].child_id[side] != kNilId) { index_t count_child, count_distinct_child; + ARROW_DCHECK(nodes_[nodes_[node_id].child_id[side]].parent_id == node_id); ValidateVisit(nodes_[node_id].child_id[side], &count_child, &count_distinct_child); *count += count_child; *count_distinct += count_distinct_child; } } - ARROW_DCHECK(*count == nodes_[node_id].subtree_count[kCountStar]); - ARROW_DCHECK(*count_distinct == nodes_[node_id].subtree_count[kCountDistinctValue]); + bool count_correct = (*count == nodes_[node_id].subtree_count[kCountStar]); + bool count_distinct_correct = + (*count_distinct == nodes_[node_id].subtree_count[kCountDistinctValue]); + if (!count_correct || !count_distinct_correct) { + Print(); + } + ARROW_DCHECK(count_correct); + ARROW_DCHECK(count_distinct_correct); } void SplayTree::ValidateTree() { @@ -419,7 +464,8 @@ void SplayTree::ValidateTree() { if (root_id_ != kNilId) { ValidateVisit(root_id_, &count, &count_distinct); } - ARROW_DCHECK(nodes_.size() == empty_slots_.size() + count + /*extra one for kNilId*/ 1); + ARROW_DCHECK(nodes_.size() <= empty_slots_.size() + count_distinct + + /*extra one for kNilId*/ 1 + 1); } template @@ -447,9 +493,10 @@ void SplayTree::Print_BoxWH(index_t node_id, std::map& boxes) } PrintBox box; + box.x = box.y = 0; int label_size = static_cast(Print_Label(node_id).length()); - if (!has_child[0] && !has_child[1] == 0) { + if (!has_child[0] && !has_child[1]) { box.root_x = 0; box.w = label_size; box.h = 1; @@ -479,6 +526,7 @@ void SplayTree::Print_BoxWH(index_t node_id, std::map& boxes) void SplayTree::Print_BoxXY(int x, int y, index_t node_id, std::map& boxes) { PrintBox& box = boxes.find(node_id)->second; + box.root_x += x; box.x += x; box.y += y; bool has_child[2]; @@ -507,8 +555,8 @@ void SplayTree::Print_PutChar(std::vector>& canvas, int x, int void SplayTree::Print_PutString(std::vector>& canvas, int x, int y, std::string str) { - for (size_t i = 0; i < str.length(); ++i) { - Print_PutChar(canvas, x, y, str[i]); + for (int i = 0; i < static_cast(str.length()); ++i) { + Print_PutChar(canvas, x + i, y, str[i]); } } diff --git a/cpp/src/arrow/compute/exec/window_functions/splay_tree.h b/cpp/src/arrow/compute/exec/window_functions/splay_tree.h index 604ca4cf91a..12b982f9f6f 100644 --- a/cpp/src/arrow/compute/exec/window_functions/splay_tree.h +++ b/cpp/src/arrow/compute/exec/window_functions/splay_tree.h @@ -70,30 +70,30 @@ class SplayTree { void SwitchParent(index_t old_parent_id, int old_child_side, index_t new_parent_id, int new_child_side); - // parent node | - // / \ / \ | - // node y --> x parent | - // / \ / \ | - // x mid mid y | + // parent node | + // / \ / \ | + // node y --> x parent | + // / \ / \ | + // x mid mid y | void Zig(index_t node_id, index_t parent_id, int parent_side); - // grandparent node | - // / \ / \ | - // parent y x parent | - // / \ --> / \ | - // node mid1 mid0 grandparent | - // / \ / \ | - // x mid0 mid1 y | + // grandparent node | + // / \ / \ | + // parent y x parent | + // / \ --> / \ | + // node mid1 mid0 grandparent | + // / \ / \ | + // x mid0 mid1 y | void ZigZig(index_t node_id, index_t parent_id, index_t grandparent_id, int parent_side); - // grandparent node | - // / \ / \ | - // parent y parent grandparent | - // / \ --> /\ / \ | - // x node x mid0 mid1 y | - // / \ | - // mid0 mid1 | + // grandparent node | + // / \ / \ | + // parent y parent grandparent | + // / \ --> /\ / \ | + // x node x mid0 mid1 y | + // / \ | + // mid0 mid1 | void ZigZag(index_t node_id, index_t parent_id, index_t grandparent_id, int parent_side, int grandparent_side); diff --git a/cpp/src/arrow/compute/exec/window_functions/window_rank.cc b/cpp/src/arrow/compute/exec/window_functions/window_rank.cc index ebd6f8123fc..cf30e06399a 100644 --- a/cpp/src/arrow/compute/exec/window_functions/window_rank.cc +++ b/cpp/src/arrow/compute/exec/window_functions/window_rank.cc @@ -150,7 +150,7 @@ void WindowRank::OnSeparateAttribute(RankType rank_type, int64_t num_rows, } else { ProgressiveSeparateAttributeRank( /*dense_rank=*/false, rank_type == RankType::RANK_TIES_LOW, num_rows, - frame_begins, frame_ends, global_ranks_sorted, output); + frame_begins, frame_ends, global_ranks_sorted, permutation, output); } break; case RankType::DENSE_RANK: @@ -161,7 +161,7 @@ void WindowRank::OnSeparateAttribute(RankType rank_type, int64_t num_rows, } else { ProgressiveSeparateAttributeRank( /*dense_rank=*/true, false, num_rows, frame_begins, frame_ends, - global_ranks_sorted, output); + global_ranks_sorted, permutation, output); } break; } @@ -246,16 +246,26 @@ void WindowRank::RankWithinFrame(bool ties_low, int64_t num_rows, const uint64_t output[i] = 1; } else if (i >= frame_ends[i]) { if (tie_with_last) { - output[i] -= frame_begins[i]; + if (ties_low) { + output[i] -= frame_begins[i]; + } else { + output[i] = frame_ends[i] - frame_begins[i] + 1; + } } else { output[i] = frame_ends[i] - frame_begins[i] + 1; } } else { - output[i] -= frame_begins[i]; + if (tie_with_last && !ties_low) { + output[i] = frame_ends[i] - frame_begins[i]; + } else { + output[i] -= frame_begins[i]; + } } } else { if (tie_with_last) { - output[i] = 1; + output[i] = ties_low ? 1 + : frame_ends[i] - frame_begins[i] + + ((i < frame_begins[i] || i >= frame_ends[i]) ? 1 : 0); } else { // Bit vector rank of current row is the same as the beginning of // the frame but different than for the last row of the frame, which @@ -352,8 +362,10 @@ void WindowRank::SeparateAttributeRank( // Ties low means outputting the number of rows in window frame with rank // lower than current row plus 1. Initialize output counter accordingly. // - int64_t delta = ties_low ? 1 : 0; - std::fill_n(output, num_rows, delta); + for (int64_t i = 0; i < num_rows; ++i) { + bool outside_of_frame = i < begins[i] || i >= ends[i]; + output[i] = (ties_low || outside_of_frame) ? 1 : 0; + } // For each row compute the number of rows with the lower rank (lower or // equal in case of ties high). @@ -362,8 +374,19 @@ void WindowRank::SeparateAttributeRank( // merge tree. // std::vector y_ends(num_rows); + int64_t first_in_group; + int64_t group_size; for (int64_t i = 0; i < num_rows; ++i) { - y_ends[permutation[i]] = (ranks_sorted ? ranks_sorted[i] : (i + 1)) + delta; + if (i == 0 || ranks_sorted[i] != ranks_sorted[i - 1]) { + first_in_group = i; + group_size = 1; + for (int64_t j = i + 1; j < num_rows; ++j) { + if (ranks_sorted[j] == ranks_sorted[i]) { + ++group_size; + } + } + } + y_ends[permutation[i]] = ties_low ? first_in_group : first_in_group + group_size; } BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) @@ -493,27 +516,31 @@ void WindowRank::SeparateAttributeDenseRank( void WindowRank::ProgressiveSeparateAttributeRank(bool dense_rank, bool ties_low, int64_t num_rows, const int64_t* begins, const int64_t* ends, - const int64_t* global_ranks, + const int64_t* global_ranks_sorted, + const int64_t* permutation, int64_t* output) { if (dense_rank) { - ProgressiveSeparateAttributeRankImp(false, num_rows, begins, ends, global_ranks, - output); + ProgressiveSeparateAttributeRankImp(false, num_rows, begins, ends, + global_ranks_sorted, permutation, output); } else { ProgressiveSeparateAttributeRankImp(ties_low, num_rows, begins, ends, - global_ranks, output); + global_ranks_sorted, permutation, output); } } template -void WindowRank::ProgressiveSeparateAttributeRankImp(bool ties_low, int64_t num_rows, - const int64_t* begins, - const int64_t* ends, - const int64_t* global_ranks, - int64_t* output) { +void WindowRank::ProgressiveSeparateAttributeRankImp( + bool ties_low, int64_t num_rows, const int64_t* begins, const int64_t* ends, + const int64_t* global_ranks_sorted, const int64_t* permutation, int64_t* output) { SplayTree tree; int64_t begin = begins[0]; int64_t end = begin; + std::vector global_ranks(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + global_ranks[permutation[i]] = global_ranks_sorted[i]; + } + for (int64_t iframe = 0; iframe < num_rows; ++iframe) { int64_t frame_begin = begins[iframe]; int64_t frame_end = ends[iframe]; @@ -536,6 +563,9 @@ void WindowRank::ProgressiveSeparateAttributeRankImp(bool ties_low, int64_t num_ output[iframe] = tree.DenseRank(global_ranks[iframe]); } else { output[iframe] = tree.Rank(ties_low, global_ranks[iframe]); + if (!ties_low && (iframe < frame_begin || iframe >= frame_end)) { + ++output[iframe]; + } } } } @@ -548,10 +578,7 @@ void WindowRankBasic::Global(RankType rank_type, int64_t num_rows, const uint64_ for (int64_t i = 0; i < num_rows; ++i) { if (i == 0) { current_group_id = 0; - first_in_group = 0; - num_in_group = 1; - for (num_in_group = 1; first_in_group + num_in_group < num_rows; ++num_in_group) { - } + first_in_group = i; } else { if (bit_util::GetBit(reinterpret_cast(bitvec), i)) { ++current_group_id; @@ -559,6 +586,7 @@ void WindowRankBasic::Global(RankType rank_type, int64_t num_rows, const uint64_ } } if (first_in_group == i) { + num_in_group = 1; while (first_in_group + num_in_group < num_rows && !bit_util::GetBit(reinterpret_cast(bitvec), first_in_group + num_in_group)) { @@ -596,29 +624,40 @@ void WindowRankBasic::WithinFrame(RankType rank_type, int64_t num_rows, int64_t num_words = bit_util::CeilDiv(end - begin + 1, 64); std::vector frame_bitvec(num_words); memset(frame_bitvec.data(), 0, num_words * sizeof(uint64_t)); + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), 0); + + int64_t start_offset = 0; if (i < begin) { - output[i] = 1; - continue; + start_offset = 1; + for (int64_t j = i + 1; j <= begin; ++j) { + if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), start_offset); + } + } } - for (int64_t j = 0; j < end - begin; ++j) { + for (int64_t j = begin; j < end; ++j) { if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), j); + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), + j - begin + start_offset); } } - bool one_more_group = false; if (i >= end) { for (int64_t j = end; j <= i; ++j) { if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { - one_more_group = true; - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), end - begin); + bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), + end - begin + start_offset); break; } } } std::vector frame_output(end - begin + 1); - Global(rank_type, end - begin + (one_more_group ? 1 : 0), frame_bitvec.data(), - frame_output.data()); - output[i] = frame_output[std::min(end, i) - begin]; + Global(rank_type, end - begin + ((i < begin || i >= end) ? 1 : 0), + frame_bitvec.data(), frame_output.data()); + if (i < begin) { + output[i] = frame_output[0]; + } else { + output[i] = frame_output[std::min(end, i) - begin]; + } } } @@ -648,34 +687,25 @@ void WindowRankBasic::SeparateAttribute(RankType rank_type, int64_t num_rows, for (int64_t j = begin; j < end; ++j) { rank_row.push_back(std::make_pair(inverse_permutation[j], j)); } - bool one_more_group = false; - if (i >= end) { + if (i >= end || i < begin) { rank_row.push_back(std::make_pair(inverse_permutation[i], i)); - if (global_ranks_sorted[inverse_permutation[i]] > - global_ranks_sorted[inverse_permutation[end - 1]]) { - one_more_group = true; - } } + int64_t rank_row_length = static_cast(rank_row.size()); std::sort(rank_row.begin(), rank_row.end()); int64_t num_words = bit_util::CeilDiv(end - begin + 1, 64); std::vector frame_bitvec(num_words); memset(frame_bitvec.data(), 0, num_words * sizeof(uint64_t)); - if (i < begin) { - output[i] = 1; - continue; - } - for (int64_t j = 0; j < end - begin + (one_more_group ? 1 : 0); ++j) { + for (int64_t j = 0; j < rank_row_length; ++j) { if (j == 0 || global_ranks_sorted[rank_row[j - 1].first] != global_ranks_sorted[rank_row[j].first]) { bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), j); } } - std::vector frame_output(end - begin + 1); - Global(rank_type, end - begin + (one_more_group ? 1 : 0), frame_bitvec.data(), - frame_output.data()); - for (int64_t j = 0; j < end - begin + (one_more_group ? 1 : 0); ++j) { + std::vector frame_output(rank_row_length); + Global(rank_type, rank_row_length, frame_bitvec.data(), frame_output.data()); + for (int64_t j = 0; j < rank_row_length; ++j) { if (rank_row[j].second == i) { output[i] = frame_output[j]; break; @@ -787,8 +817,8 @@ void WindowRankTest::TestRank(RankType rank_type, bool separate_ranking_attribut } // int64_t end = __rdtsc(); // printf("cpr basic %.1f ", - // static_cast(end - start) / static_cast(num_rows * - // num_repeats)); + // static_cast(end - start) / static_cast(num_rows * + // num_repeats)); // start = __rdtsc(); for (int repeat = 0; repeat < num_repeats; ++repeat) { if (!use_frames) { @@ -807,11 +837,15 @@ void WindowRankTest::TestRank(RankType rank_type, bool separate_ranking_attribut } // end = __rdtsc(); // printf("cpr normal %.1f ", - // static_cast(end - start) / static_cast(num_rows * - // num_repeats)); + // static_cast(end - start) / static_cast(num_rows * + // num_repeats)); bool ok = true; for (int64_t i = 0; i < num_rows; ++i) { + if (output[0][i] != output[1][i]) { + ARROW_DCHECK(false); + ok = false; + } } printf("%s\n", ok ? "correct" : "wrong"); } diff --git a/cpp/src/arrow/compute/exec/window_functions/window_rank.h b/cpp/src/arrow/compute/exec/window_functions/window_rank.h index 71eda1f995d..e6da5e17b2d 100644 --- a/cpp/src/arrow/compute/exec/window_functions/window_rank.h +++ b/cpp/src/arrow/compute/exec/window_functions/window_rank.h @@ -114,15 +114,14 @@ class WindowRank { static void ProgressiveSeparateAttributeRank(bool dense_rank, bool ties_low, int64_t num_rows, const int64_t* begins, const int64_t* ends, - const int64_t* global_ranks, + const int64_t* global_ranks_sorted, + const int64_t* permutation, int64_t* output); template - static void ProgressiveSeparateAttributeRankImp(bool ties_low, int64_t num_rows, - const int64_t* begins, - const int64_t* ends, - const int64_t* global_ranks, - int64_t* output); + static void ProgressiveSeparateAttributeRankImp( + bool ties_low, int64_t num_rows, const int64_t* begins, const int64_t* ends, + const int64_t* global_ranks_sorted, const int64_t* permutation, int64_t* output); }; class WindowRankBasic { From 4ce8e8c4f21c0ea0d073398dd297f29444d6edbc Mon Sep 17 00:00:00 2001 From: michalursa Date: Mon, 14 Nov 2022 16:40:33 -0800 Subject: [PATCH 3/3] Window Functions rewrite of helper classes for ranking --- cpp/src/arrow/CMakeLists.txt | 3 - cpp/src/arrow/compute/exec/CMakeLists.txt | 5 + cpp/src/arrow/compute/exec/util.h | 33 + .../window_functions/bit_vector_navigator.cc | 121 -- .../window_functions/bit_vector_navigator.h | 573 ++++++-- .../exec/window_functions/merge_tree.cc | 1204 ++++++++++++++--- .../exec/window_functions/merge_tree.h | 554 ++++---- .../exec/window_functions/range_tree.cc | 227 ---- .../exec/window_functions/range_tree.h | 67 - .../exec/window_functions/splay_tree.cc | 610 --------- .../exec/window_functions/splay_tree.h | 139 -- .../exec/window_functions/window_frame.h | 139 +- .../exec/window_functions/window_rank.cc | 1085 +++++---------- .../exec/window_functions/window_rank.h | 141 +- .../exec/window_functions/window_test.cc | 453 +++++++ 15 files changed, 2653 insertions(+), 2701 deletions(-) delete mode 100644 cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc delete mode 100644 cpp/src/arrow/compute/exec/window_functions/range_tree.cc delete mode 100644 cpp/src/arrow/compute/exec/window_functions/range_tree.h delete mode 100644 cpp/src/arrow/compute/exec/window_functions/splay_tree.cc delete mode 100644 cpp/src/arrow/compute/exec/window_functions/splay_tree.h create mode 100644 cpp/src/arrow/compute/exec/window_functions/window_test.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index d6f9c94fc89..3517d0cf041 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -410,10 +410,7 @@ if(ARROW_COMPUTE) compute/exec/tpch_node.cc compute/exec/union_node.cc compute/exec/util.cc - compute/exec/window_functions/bit_vector_navigator.cc compute/exec/window_functions/merge_tree.cc - compute/exec/window_functions/splay_tree.cc - compute/exec/window_functions/range_tree.cc compute/exec/window_functions/window_rank.cc compute/function.cc compute/function_internal.cc diff --git a/cpp/src/arrow/compute/exec/CMakeLists.txt b/cpp/src/arrow/compute/exec/CMakeLists.txt index 4ce73359d0f..d83258a9722 100644 --- a/cpp/src/arrow/compute/exec/CMakeLists.txt +++ b/cpp/src/arrow/compute/exec/CMakeLists.txt @@ -45,6 +45,11 @@ add_arrow_compute_test(util_test SOURCES util_test.cc task_util_test.cc) +add_arrow_compute_test(window_functions_test + PREFIX + "arrow-compute" + SOURCES + window_functions/window_test.cc) add_arrow_benchmark(expression_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/exec/util.h b/cpp/src/arrow/compute/exec/util.h index b249857f441..0c332d21774 100644 --- a/cpp/src/arrow/compute/exec/util.h +++ b/cpp/src/arrow/compute/exec/util.h @@ -424,5 +424,38 @@ Result ModifyExpression(Expression expr, const PreVisit& pre, return post_call(std::move(expr), NULLPTR); } +struct ThreadContext { + int64_t thread_index; + util::TempVectorStack* temp_vector_stack; + int64_t hardware_flags; +}; + +struct ParallelForStream { + using TaskCallback = std::function; + + void InsertParallelFor(int64_t num_tasks, TaskCallback task_callback) { + parallel_fors_.push_back(std::make_pair(num_tasks, task_callback)); + } + + void InsertTaskSingle(TaskCallback task_callback) { + parallel_fors_.push_back(std::make_pair(static_cast(1), task_callback)); + } + + // If any of the tasks returns an error status then all the remaining parallel + // fors in the stream will not be executed and the first error status within + // the failing parallel for loop step will be returned. + // + Status RunOnSingleThread(ThreadContext& thread_context) { + for (size_t i = 0; i < parallel_fors_.size(); ++i) { + for (int64_t j = 0; j < parallel_fors_[i].first; ++j) { + ARROW_RETURN_NOT_OK(parallel_fors_[i].second(j, thread_context)); + } + } + return Status::OK(); + } + + std::vector> parallel_fors_; +}; + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc deleted file mode 100644 index 038a3f921de..00000000000 --- a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.cc +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/compute/exec/window_functions/bit_vector_navigator.h" -#include -#include "arrow/compute/exec/util.h" - -namespace arrow { -namespace compute { - -void BitVectorNavigator::SelectsForRangeOfRanks( - int64_t rank_begin, int64_t rank_end, int64_t num_bits, const uint64_t* bitvec, - const uint64_t* popcounts, int64_t* outputs, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - ARROW_DCHECK(rank_begin <= rank_end); - if (rank_begin == rank_end) { - return; - } - int64_t popcount_all = PopCount(num_bits, bitvec, popcounts); - if (rank_end <= 0LL) { - for (int64_t i = 0LL; i < rank_end - rank_begin; ++i) { - outputs[i] = -1LL; - } - return; - } - if (rank_begin >= popcount_all) { - for (int64_t i = 0LL; i < rank_end - rank_begin; ++i) { - outputs[i] = num_bits; - } - return; - } - if (rank_begin < 0LL) { - for (int64_t i = 0LL; i < -rank_begin; ++i) { - outputs[i] = -1LL; - } - outputs += -rank_begin; - rank_begin = 0LL; - } - if (rank_end > popcount_all) { - for (int64_t i = popcount_all - rank_begin; i < rank_end - rank_begin; ++i) { - outputs[i] = num_bits; - } - rank_end = popcount_all; - } - - int64_t minibatch_length_max = util::MiniBatch::kMiniBatchLength; - auto indexes = util::TempVectorHolder( - temp_vector_stack, static_cast(minibatch_length_max)); - int num_indexes; - - int64_t first_select = - BitVectorNavigator::Select(rank_begin, num_bits, bitvec, popcounts); - int64_t last_select = BitVectorNavigator::Select(rank_end, num_bits, bitvec, popcounts); - - for (int64_t minibatch_begin = first_select; minibatch_begin < last_select + 1; - minibatch_begin += minibatch_length_max) { - int64_t minibatch_end = - std::min(last_select + 1, minibatch_begin + minibatch_length_max); - util::bit_util::bits_to_indexes( - /*bit_to_search=*/1, hardware_flags, - static_cast(minibatch_end - minibatch_begin), - reinterpret_cast(bitvec), &num_indexes, indexes.mutable_data(), - static_cast(minibatch_begin)); - for (int i = 0; i < num_indexes; ++i) { - outputs[i] = minibatch_begin + indexes.mutable_data()[i]; - } - outputs += num_indexes; - } -} - -void BitVectorNavigator::SelectsForRelativeRanksForRangeOfRows( - int64_t batch_begin, int64_t batch_end, int64_t rank_delta, int64_t num_rows, - const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, int64_t* outputs, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack) { - // Break into mini-batches - int64_t minibatch_length_max = util::MiniBatch::kMiniBatchLength; - auto selects_for_ranks_buf = util::TempVectorHolder( - temp_vector_stack, static_cast(minibatch_length_max)); - auto selects_for_ranks = selects_for_ranks_buf.mutable_data(); - for (int64_t minibatch_begin = batch_begin; minibatch_begin < batch_end; - minibatch_begin += minibatch_length_max) { - int64_t minibatch_end = std::min(batch_end, minibatch_begin + minibatch_length_max); - - // First and last rank that we are interested in - int64_t first_rank = - BitVectorNavigator::RankNext(minibatch_begin, ties_bitvec, ties_popcounts) - 1LL; - int64_t last_rank = - BitVectorNavigator::RankNext(minibatch_end - 1, ties_bitvec, ties_popcounts) - - 1LL; - - // Do select for each rank in the calculated range. - // - BitVectorNavigator::SelectsForRangeOfRanks( - first_rank + rank_delta, last_rank + rank_delta + 1, num_rows, ties_bitvec, - ties_popcounts, selects_for_ranks, hardware_flags, temp_vector_stack); - - int irank = 0; - outputs[minibatch_begin - batch_begin] = selects_for_ranks[irank]; - for (int64_t i = minibatch_begin + 1; i < minibatch_end; ++i) { - irank += bit_util::GetBit(reinterpret_cast(ties_bitvec), i) ? 1 : 0; - outputs[minibatch_begin - batch_begin] = selects_for_ranks[irank]; - } - } -} - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h index ba62e21ea12..f5013e19e65 100644 --- a/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h +++ b/cpp/src/arrow/compute/exec/window_functions/bit_vector_navigator.h @@ -24,134 +24,489 @@ namespace arrow { namespace compute { -// Bit-vector allocated size must be multiple of 64-bits. -// There is exactly ceil(num_bits / 64) 64-bit population counters. +// Storage for a bit vector to be used with BitVectorNavigator and its variants. // -class BitVectorNavigator { +// Supports weaved bit vectors. +// +class BitVectorWithCountsBase { + template + friend class BitVectorNavigatorImp; + + public: + BitVectorWithCountsBase() : num_children_(0), num_bits_per_child_(0) {} + + void Resize(int64_t num_bits_per_child, int64_t num_children = 1) { + ARROW_DCHECK(num_children > 0 && num_bits_per_child > 0); + num_children_ = num_children; + num_bits_per_child_ = num_bits_per_child; + int64_t num_words = + bit_util::CeilDiv(num_bits_per_child, kBitsPerWord) * num_children; + bits_.resize(num_words); + mid_counts_.resize(num_words); + int64_t num_blocks = + bit_util::CeilDiv(num_bits_per_child, kBitsPerBlock) * num_children; + top_counts_.resize(num_blocks); + } + + void ClearBits() { memset(bits_.data(), 0, bits_.size() * sizeof(bits_[0])); } + + // Word is 64 adjacent bits + // + static constexpr int64_t kBitsPerWord = 64; + // Block is 65536 adjacent bits + // (that means that 16-bit counters can be used within the block) + // +#ifndef NDEBUG + static constexpr int kLogBitsPerBlock = 7; +#else + static constexpr int kLogBitsPerBlock = 16; +#endif + static constexpr int64_t kBitsPerBlock = 1LL << kLogBitsPerBlock; + + protected: + int64_t num_children_; + int64_t num_bits_per_child_; + // TODO: Replace vectors with ResizableBuffers. Return error status from + // Resize on out-of-memory. + // + std::vector bits_; + std::vector top_counts_; + std::vector mid_counts_; +}; + +template +class BitVectorNavigatorImp { public: - static uint64_t GenPopCounts(int64_t num_bits, const uint64_t* bits, - uint64_t* pop_counts) { - int64_t num_pop_counts = (num_bits + 63) / 64; - uint64_t sum = 0; - for (int64_t i = 0; i < num_pop_counts; ++i) { - pop_counts[i] = sum; - sum += ARROW_POPCOUNT64(bits[i]); + BitVectorNavigatorImp() : container_(NULLPTR) {} + + BitVectorNavigatorImp(BitVectorWithCountsBase* container, int64_t child_index) + : container_(container), child_index_(child_index) {} + + int64_t block_count() const { + return bit_util::CeilDiv(container_->num_bits_per_child_, + BitVectorWithCountsBase::kBitsPerBlock); + } + + int64_t word_count() const { + return bit_util::CeilDiv(container_->num_bits_per_child_, + BitVectorWithCountsBase::kBitsPerWord); + } + + int64_t bit_count() const { return container_->num_bits_per_child_; } + + int64_t pop_count() const { + int64_t last_block = block_count() - 1; + int64_t last_word = word_count() - 1; + int num_bits_last_word = + static_cast((bit_count() - 1) % BitVectorWithCountsBase::kBitsPerWord + 1); + uint64_t last_word_mask = ~0ULL >> (64 - num_bits_last_word); + return container_->top_counts_[apply_stride_and_offset(last_block)] + + container_->mid_counts_[apply_stride_and_offset(last_word)] + + ARROW_POPCOUNT64(container_->bits_[apply_stride_and_offset(last_word)] & + last_word_mask); + } + + const uint8_t* GetBytes() const { + return reinterpret_cast(container_->bits_.data()); + } + + void BuildMidCounts(int64_t block_index) { + ARROW_DCHECK(block_index >= 0 && + block_index < static_cast(container_->mid_counts_.size())); + constexpr int64_t words_per_block = + BitVectorWithCountsBase::kBitsPerBlock / BitVectorWithCountsBase::kBitsPerWord; + int64_t word_begin = block_index * words_per_block; + int64_t word_end = std::min(word_count(), word_begin + words_per_block); + + const uint64_t* words = container_->bits_.data(); + uint16_t* counters = container_->mid_counts_.data(); + + uint16_t count = 0; + for (int64_t word_index = word_begin; word_index < word_end; ++word_index) { + counters[apply_stride_and_offset(word_index)] = count; + count += static_cast( + ARROW_POPCOUNT64(words[apply_stride_and_offset(word_index)])); } - return sum; } - // O(1) - static inline int64_t PopCount(int64_t num_bits, const uint64_t* bitvec, - const uint64_t* popcounts) { - int64_t last_word = (num_bits - 1) / 64; - return popcounts[last_word] + ARROW_POPCOUNT64(bitvec[last_word]); + void BuildTopCounts(int64_t block_index_begin, int64_t block_index_end, + int64_t initial_count = 0) { + const uint64_t* words = container_->bits_.data(); + int64_t* counters = container_->top_counts_.data(); + const uint16_t* mid_counters = container_->mid_counts_.data(); + + int64_t count = initial_count; + + for (int64_t block_index = block_index_begin; block_index < block_index_end - 1; + ++block_index) { + counters[apply_stride_and_offset(block_index)] = count; + + constexpr int64_t words_per_block = + BitVectorWithCountsBase::kBitsPerBlock / BitVectorWithCountsBase::kBitsPerWord; + + int64_t word_begin = block_index * words_per_block; + int64_t word_end = std::min(word_count(), word_begin + words_per_block); + + count += mid_counters[apply_stride_and_offset(word_end - 1)]; + count += ARROW_POPCOUNT64(words[apply_stride_and_offset(word_end - 1)]); + } + counters[apply_stride_and_offset(block_index_end - 1)] = count; } - // O(log(N)) - // The output is set to -1 if rank is below zero and to num_bits if - // rank is above the maximum rank of any row in the represented range. - static inline int64_t Select(int64_t rank, int64_t num_bits, const uint64_t* bits, - const uint64_t* pop_counts) { + // Position of the nth bit set (input argument zero corresponds to the first + // bit set). + // + int64_t Select(int64_t rank) const { if (rank < 0) { - return -1LL; - } - int64_t max_rank = PopCount(num_bits, bits, pop_counts) - 1LL; - if (rank > max_rank) { - return num_bits; - } - - int64_t num_pop_counts = (num_bits + 63) / 64; - // Find index of 64-bit block that contains the nth set bit. - int64_t block_id = (std::upper_bound(pop_counts, pop_counts + num_pop_counts, - static_cast(rank)) - - pop_counts) - - 1; - // Binary search position of (n - pop_count + 1)th bit set in the 64-bit - // block. - uint64_t block = bits[block_id]; - int64_t bit_rank = rank - pop_counts[block_id]; - int bit_id = 0; - for (int half_bits = 32; half_bits >= 1; half_bits /= 2) { - uint64_t mask = ((1ULL << half_bits) - 1ULL); - int64_t lower_half_pop_count = ARROW_POPCOUNT64(block & mask); - if (bit_rank >= lower_half_pop_count) { - block >>= half_bits; - bit_rank -= lower_half_pop_count; - bit_id += half_bits; + return BeforeFirstBit(); + } + if (rank >= pop_count()) { + return AfterLastBit(); + } + + constexpr int64_t bits_per_block = BitVectorWithCountsBase::kBitsPerBlock; + constexpr int64_t bits_per_word = BitVectorWithCountsBase::kBitsPerWord; + constexpr int64_t words_per_block = bits_per_block / bits_per_word; + const int64_t* top_counters = container_->top_counts_.data(); + const uint16_t* mid_counters = container_->mid_counts_.data(); + const uint64_t* words = container_->bits_.data(); + + // Binary search in top level counters. + // + // Equivalent of std::upper_bound() - 1, but not using iterators. + // + int64_t begin = 0; + int64_t end = block_count(); + while (end - begin > 1) { + int64_t middle = (begin + end) / 2; + int reject_left_half = + (rank >= top_counters[apply_stride_and_offset(middle)]) ? 1 : 0; + begin = begin + (middle - begin) * reject_left_half; + end = middle + (end - middle) * reject_left_half; + } + + int64_t block_index = begin; + rank -= top_counters[apply_stride_and_offset(begin)]; + + // Continue with binary search in intermediate level counters of the + // selected block. + // + begin = block_index * words_per_block; + end = std::min(word_count(), begin + words_per_block); + while (end - begin > 1) { + int64_t middle = (begin + end) / 2; + int reject_left_half = + (rank >= mid_counters[apply_stride_and_offset(middle)]) ? 1 : 0; + begin = begin + (middle - begin) * reject_left_half; + end = middle + (end - middle) * reject_left_half; + } + + int64_t word_index = begin; + rank -= mid_counters[apply_stride_and_offset(begin)]; + + // Continue with binary search in the selected word. + // + uint64_t word = words[apply_stride_and_offset(word_index)]; + int pop_count_prefix = 0; + int bit_count_prefix = 0; + const uint64_t masks[6] = {0xFFFFFFFFULL, 0xFFFFULL, 0xFFULL, 0xFULL, 0x3ULL, 0x1ULL}; + int bit_count_left_half = 32; + for (int i = 0; i < 6; ++i) { + int pop_count_left_half = + static_cast(ARROW_POPCOUNT64((word >> bit_count_prefix) & masks[i])); + int reject_left_half = (rank >= pop_count_prefix + pop_count_left_half) ? 1 : 0; + pop_count_prefix += reject_left_half * pop_count_left_half; + bit_count_prefix += reject_left_half * bit_count_left_half; + bit_count_left_half /= 2; + } + + return word_index * bits_per_word + bit_count_prefix; + } + + void Select(int64_t rank_begin, int64_t rank_end, int64_t* selects, + const ThreadContext& thread_ctx) const { + ARROW_DCHECK(rank_begin <= rank_end); + + // For ranks out of the range represented in the bit vector return + // BeforeFirstBit() or AfterLastBit(). + // + if (rank_begin < 0) { + int64_t num_ranks_to_skip = + std::min(rank_end, static_cast(0)) - rank_begin; + for (int64_t i = 0LL; i < num_ranks_to_skip; ++i) { + selects[i] = BeforeFirstBit(); + } + selects += num_ranks_to_skip; + rank_begin += num_ranks_to_skip; + } + + int64_t rank_max = pop_count() - 1; + if (rank_end > rank_max + 1) { + int64_t num_ranks_to_skip = rank_end - std::max(rank_begin, rank_max + 1); + for (int64_t i = 0LL; i < num_ranks_to_skip; ++i) { + selects[rank_end - num_ranks_to_skip + i] = AfterLastBit(); + } + rank_end -= num_ranks_to_skip; + } + + // If there are no more ranks left then we are done. + // + if (rank_begin == rank_end) { + return; + } + + auto temp_vector_stack = thread_ctx.temp_vector_stack; // For TEMP_VECTOR + TEMP_VECTOR(uint16_t, ids); + int num_ids; + TEMP_VECTOR(uint64_t, temp_words); + + int64_t select_begin = Select(rank_begin); + int64_t select_end = Select(rank_end - 1) + 1; + + constexpr int64_t bits_per_word = BitVectorWithCountsBase::kBitsPerWord; + const uint64_t* words = container_->bits_.data(); + + // Split processing into mini batches, in order to use small buffers on + // the stack (and in CPU cache) for intermediate vectors. + // + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, select_end - select_begin) + + int64_t bit_begin = select_begin + batch_begin; + int64_t word_begin = bit_begin / bits_per_word; + int64_t word_end = + (select_begin + batch_begin + batch_length - 1) / bits_per_word + 1; + + // Copy words from interleaved bit vector to the temporary buffer that will + // have them in a contiguous block of memory. + // + for (int64_t word_index = word_begin; word_index < word_end; ++word_index) { + temp_words[word_index - word_begin] = words[apply_stride_and_offset(word_index)]; + } + + // Find positions of all bits set in current mini-batch of bits + // + util::bit_util::bits_to_indexes( + /*bit_to_search=*/1, thread_ctx.hardware_flags, static_cast(batch_length), + reinterpret_cast(temp_words), &num_ids, ids, + static_cast(bit_begin % bits_per_word)); + + // Output positions of bits set. + // + for (int i = 0; i < num_ids; ++i) { + selects[i] = bit_begin + ids[i]; + } + selects += num_ids; + + END_MINI_BATCH_FOR + } + + template + int64_t RankImp(int64_t bit_index) const { + const int64_t* top_counters = container_->top_counts_.data(); + const uint16_t* mid_counters = container_->mid_counts_.data(); + const uint64_t* words = container_->bits_.data(); + constexpr int64_t bits_per_block = BitVectorWithCountsBase::kBitsPerBlock; + constexpr int64_t bits_per_word = BitVectorWithCountsBase::kBitsPerWord; + uint64_t bit_mask = INCLUSIVE_RANK + ? (~0ULL >> (bits_per_word - 1 - (bit_index % bits_per_word))) + : ((1ULL << (bit_index % bits_per_word)) - 1ULL); + return top_counters[apply_stride_and_offset(bit_index / bits_per_block)] + + mid_counters[apply_stride_and_offset(bit_index / bits_per_word)] + + ARROW_POPCOUNT64(words[apply_stride_and_offset(bit_index / bits_per_word)] & + bit_mask); + } + + // Number of bits in the range [0, bit_index - 1] that are set. + // + int64_t Rank(int64_t bit_index) const { + return RankImp(bit_index); + } + + void Rank(int64_t bit_index_begin, int64_t bit_index_end, int64_t* ranks) const { + const uint64_t* words = container_->bits_.data(); + constexpr int64_t bits_per_word = BitVectorWithCountsBase::kBitsPerWord; + + int64_t rank = Rank(bit_index_begin); + uint64_t word = words[apply_stride_and_offset(bit_index_begin / bits_per_word)]; + for (int64_t bit_index = bit_index_begin; bit_index < bit_index_end; ++bit_index) { + if (bit_index % bits_per_word == 0) { + word = words[apply_stride_and_offset(bit_index / bits_per_word)]; } + ranks[bit_index - bit_index_begin] = rank; + rank += (word >> (bit_index % bits_per_word)) & 1; } - return block_id * 64 + bit_id; } - // TODO: We could implement BitVectorNavigator::Select that works on batches - // instead of single rows. Then it could use precomputed static B-tree to - // speed up binary search. + // Number of bits in the range [0, bit_index] that are set. // + int64_t RankNext(int64_t bit_index) const { + return RankImp(bit_index); + } + + uint64_t GetBit(int64_t bit_index) const { + constexpr int64_t bits_per_word = BitVectorWithCountsBase::kBitsPerWord; + return (GetWord(bit_index / bits_per_word) >> (bit_index % bits_per_word)) & 1ULL; + } - // O(1) - // Input row number must be valid (between 0 and number of rows less 1). - static inline int64_t Rank(int64_t pos, const uint64_t* bits, - const uint64_t* pop_counts) { - int64_t block = pos >> 6; - int offset = static_cast(pos & 63LL); - uint64_t mask = (1ULL << offset) - 1ULL; - int64_t rank1 = - static_cast(pop_counts[block]) + ARROW_POPCOUNT64(bits[block] & mask); - return rank1; - } - - // O(1) - // Rank of the next row (also valid for the last row when the next row would - // be outside of the range of rows). - static inline int64_t RankNext(int64_t pos, const uint64_t* bits, - const uint64_t* pop_counts) { - int64_t block = pos >> 6; - int offset = static_cast(pos & 63LL); - uint64_t mask = ~0ULL >> (63 - offset); - int64_t rank1 = - static_cast(pop_counts[block]) + ARROW_POPCOUNT64(bits[block] & mask); - return rank1; - } - - // Input ranks may be outside of range of ranks present in the input bit - // vector. + uint64_t GetWord(int64_t word_index) const { + const uint64_t* words = container_->bits_.data(); + return words[apply_stride_and_offset(word_index)]; + } + + void SetBit(int64_t bit_index) { + constexpr int64_t bits_per_word = BitVectorWithCountsBase::kBitsPerWord; + int64_t word_index = bit_index / bits_per_word; + SetWord(word_index, GetWord(word_index) | (1ULL << (bit_index % bits_per_word))); + } + + void SetWord(int64_t word_index, uint64_t word_value) { + uint64_t* words = container_->bits_.data(); + words[apply_stride_and_offset(word_index)] = word_value; + } + + // Constants returned from select query when the rank is outside of the + // range of ranks represented in the bit vector. + // + int64_t BeforeFirstBit() const { return -1LL; } + + int64_t AfterLastBit() const { return bit_count(); } + + // Populate bit vector and counters marking the first position in each group + // of ties for the sequence of values. // - static void SelectsForRangeOfRanks(int64_t rank_begin, int64_t rank_end, - int64_t num_bits, const uint64_t* bitvec, - const uint64_t* popcounts, int64_t* outputs, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack); - - static void SelectsForRelativeRanksForRangeOfRows( - int64_t batch_begin, int64_t batch_end, int64_t rank_delta, int64_t num_rows, - const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, int64_t* outputs, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); - - template - static void GenSelectedIds(int64_t num_rows, const uint64_t* bitvec, INDEX_T* ids, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - // Break into mini-batches. + template + void MarkTieBegins(int64_t length, const T* sorted) { + container_->Resize(length); + + // We start from position 1, in order to not check (i==0) condition inside + // the loop. First position always starts a new group. // - int64_t batch_length_max = util::MiniBatch::kMiniBatchLength; - auto batch_ids_buf = - util::TempVectorHolder(temp_vector_stack, batch_length_max); - auto batch_ids = batch_ids_buf.mutable_data(); - int batch_num_ids; - int64_t num_ids = 0; - for (int64_t batch_begin = 0; batch_begin < num_rows; - batch_begin += batch_length_max) { - int64_t batch_length = std::min(num_rows - batch_begin, batch_length_max); - util::bit_util::bits_to_indexes( - /*bit_to_search=*/1, hardware_flags, batch_length, - reinterpret_cast(bitvec + (batch_begin / 64)), &batch_num_ids, - batch_ids); - for (int i = 0; i < batch_num_ids; ++i) { - ids[num_ids + i] = static_cast(batch_begin + batch_ids[i]); + uint64_t word = 1ULL; + for (int64_t i = 1; i < length; ++i) { + uint64_t bit_value = (sorted[i - 1] != sorted[i]) ? 1ULL : 0ULL; + word |= bit_value << (i & 63); + if ((i & 63) == 63) { + SetWord(i / 64, word); + word = 0ULL; } - num_ids += batch_num_ids; } + if (length % 64 > 0) { + SetWord(length / 64, word); + } + + // Generate population counters for the bit vector. + // + for (int64_t block_index = 0; block_index < block_count(); ++block_index) { + BuildMidCounts(block_index); + } + BuildTopCounts(0, block_count()); } + + void DebugPrintCountersToFile(FILE* fout) const { + int64_t num_words = bit_util::CeilDiv(container_->num_bits_per_child_, + BitVectorWithCountsBase::kBitsPerWord); + int64_t num_blocks = bit_util::CeilDiv(container_->num_bits_per_child_, + BitVectorWithCountsBase::kBitsPerBlock); + fprintf(fout, "\nmid_counts: "); + for (int64_t word_index = 0; word_index < num_words; ++word_index) { + fprintf( + fout, "%d ", + static_cast(container_->mid_counts_[apply_stride_and_offset(word_index)])); + } + fprintf(fout, "\ntop_counts: "); + for (int64_t block_index = 0; block_index < num_blocks; ++block_index) { + fprintf(fout, "%d ", + static_cast( + container_->top_counts_[apply_stride_and_offset(block_index)])); + } + } + + private: + int64_t apply_stride_and_offset(int64_t index) const { + if (SINGLE_CHILD_BIT_VECTOR) { + return index; + } + int64_t stride = container_->num_children_; + int64_t offset = child_index_; + return index * stride + offset; + } + + BitVectorWithCountsBase* container_; + int64_t child_index_; +}; + +using BitVectorNavigator = BitVectorNavigatorImp; +using BitWeaverNavigator = BitVectorNavigatorImp; + +class BitVectorWithCounts : public BitVectorWithCountsBase { + public: + BitVectorNavigator GetNavigator() { + ARROW_DCHECK(num_children_ == 1); + return BitVectorNavigator(this, 0); + } + BitWeaverNavigator GetChildNavigator(int64_t child_index) { + ARROW_DCHECK(child_index >= 0 && child_index < num_children_); + return BitWeaverNavigator(this, child_index); + } +}; + +class BitMatrixWithCounts { + public: + ~BitMatrixWithCounts() { + for (size_t i = 0; i < bands_.size(); ++i) { + if (bands_[i]) { + delete bands_[i]; + } + } + } + + BitMatrixWithCounts() : band_size_(0), bit_count_(0), num_rows_allocated_(0) {} + + void Init(int band_size, int64_t bit_count) { + ARROW_DCHECK(band_size > 0 && bit_count > 0); + ARROW_DCHECK(band_size_ == 0); + band_size_ = band_size; + bit_count_ = bit_count; + num_rows_allocated_ = 0; + } + + void AddRow(int row_index) { + // Make a room in a lookup table for row with this index if needed. + // + int row_index_end = static_cast(row_navigators_.size()); + if (row_index >= row_index_end) { + row_navigators_.resize(row_index + 1); + } + + // Check if we need to allocate a new band. + // + int num_bands = static_cast(bands_.size()); + if (num_rows_allocated_ == num_bands * band_size_) { + bands_.push_back(new BitVectorWithCountsBase()); + bands_.back()->Resize(bit_count_, band_size_); + } + + // Initialize BitWeaverNavigator for that row. + // + row_navigators_[row_index] = + BitWeaverNavigator(bands_[num_rows_allocated_ / band_size_], + static_cast(num_rows_allocated_ % band_size_)); + + ++num_rows_allocated_; + } + + BitWeaverNavigator& GetMutableRow(int row_index) { return row_navigators_[row_index]; } + + const BitWeaverNavigator& GetRow(int row_index) const { + return row_navigators_[row_index]; + } + + private: + int band_size_; + int64_t bit_count_; + int num_rows_allocated_; + std::vector bands_; + std::vector row_navigators_; }; } // namespace compute diff --git a/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc b/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc index 835a64053a0..d8e2ca1b572 100644 --- a/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc +++ b/cpp/src/arrow/compute/exec/window_functions/merge_tree.cc @@ -20,273 +20,1065 @@ namespace arrow { namespace compute { -void MergeTree::Build(int64_t num_rows, const int64_t* permutation, - int num_levels_to_skip, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - num_rows_ = num_rows; - if (num_rows == 0) { +bool MergeTree::IsPermutation(int64_t length, const int64_t* values) { + std::vector present(length, false); + for (int64_t i = 0; i < length; ++i) { + auto value = values[i]; + if (value < 0LL || value >= length || present[value]) { + return false; + } + present[value] = true; + } + return true; +} + +int64_t MergeTree::NodeBegin(int level, int64_t pos) const { + return pos & ~((1LL << level) - 1); +} + +int64_t MergeTree::NodeEnd(int level, int64_t pos) const { + return std::min(NodeBegin(level, pos) + (static_cast(1) << level), length_); +} + +void MergeTree::CascadeBegin(int from_level, int64_t begin, int64_t* lbegin, + int64_t* rbegin) const { + ARROW_DCHECK(begin >= 0 && begin < length_); + ARROW_DCHECK(from_level >= 1); + auto& split_bits = bit_matrix_.GetRow(from_level); + auto node_begin = NodeBegin(from_level, begin); + auto node_begin_plus_whole = node_begin + (1LL << from_level); + auto node_begin_plus_half = node_begin + (1LL << (from_level - 1)); + int64_t node_popcnt = split_bits.Rank(begin) - node_begin / 2; + *rbegin = node_begin_plus_half + node_popcnt; + *lbegin = begin - node_popcnt; + *lbegin = + (*lbegin == node_begin_plus_half || *lbegin == length_) ? kEmptyRange : *lbegin; + *rbegin = + (*rbegin == node_begin_plus_whole || *rbegin == length_) ? kEmptyRange : *rbegin; +} + +void MergeTree::CascadeEnd(int from_level, int64_t end, int64_t* lend, + int64_t* rend) const { + ARROW_DCHECK(end > 0 && end <= length_); + ARROW_DCHECK(from_level >= 1); + auto& split_bits = bit_matrix_.GetRow(from_level); + auto node_begin = NodeBegin(from_level, end - 1); + auto node_begin_plus_half = node_begin + (1LL << (from_level - 1)); + int64_t node_popcnt = split_bits.RankNext(end - 1) - node_begin / 2; + *rend = node_begin_plus_half + node_popcnt; + *lend = end - node_popcnt; + *rend = (*rend == node_begin_plus_half) ? kEmptyRange : *rend; + *lend = (*lend == node_begin) ? kEmptyRange : *lend; +} + +int64_t MergeTree::CascadePos(int from_level, int64_t pos) const { + ARROW_DCHECK(pos >= 0 && pos < length_); + ARROW_DCHECK(from_level >= 1); + auto& split_bits = bit_matrix_.GetRow(from_level); + auto node_begin = NodeBegin(from_level, pos); + auto node_begin_plus_half = node_begin + (1LL << (from_level - 1)); + int64_t node_popcnt = split_bits.Rank(pos) - node_begin / 2; + return split_bits.GetBit(pos) ? node_begin_plus_half + node_popcnt : pos - node_popcnt; +} + +MergeTree::NodeSubsetType MergeTree::NodeIntersect(int level, int64_t pos, int64_t begin, + int64_t end) { + auto node_begin = NodeBegin(level, pos); + auto node_end = NodeEnd(level, pos); + return (node_begin >= begin && node_end <= end) ? NodeSubsetType::FULL + : (node_begin < end && node_end > begin) ? NodeSubsetType::PARTIAL + : NodeSubsetType::EMPTY; +} + +template +void MergeTree::SplitSubsetImp(const BitWeaverNavigator& split_bits, int source_level, + const T* source_level_vector, T* target_level_vector, + int64_t read_begin, int64_t read_end, + int64_t write_begin_bit0, int64_t write_begin_bit1, + ThreadContext& thread_ctx) { + ARROW_DCHECK(source_level >= 1); + + if (read_end == read_begin) { + return; + } + + int64_t write_begin[2]; + write_begin[0] = write_begin_bit0; + write_begin[1] = write_begin_bit1; + int64_t write_offset[2]; + write_offset[0] = write_offset[1] = 0; + int target_level = source_level - 1; + int64_t target_node_mask = (1LL << target_level) - 1LL; + if (MULTIPLE_SOURCE_NODES) { + // In case of processing multiple input nodes, + // we must align write_begin to the target level node boundary, + // so that the target node index calculation inside the main loop behaves + // correctly. + // + write_offset[0] = write_begin[0] & target_node_mask; + write_offset[1] = write_begin[1] & target_node_mask; + write_begin[0] &= ~target_node_mask; + write_begin[1] &= ~target_node_mask; + } + + uint64_t split_bits_batch[util::MiniBatch::kMiniBatchLength / 64 + 1]; + int num_ids_batch; + auto temp_vector_stack = thread_ctx.temp_vector_stack; + TEMP_VECTOR(uint16_t, ids_batch); + + // Split processing into mini batches, in order to use small buffers on + // the stack (and in CPU cache) for intermediate vectors. + // + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, read_end - read_begin) + + // Copy bit vector words related to the current batch on the stack. + // + // Bit vector words from multiple levels are interleaved in memory, that + // is why we make a copy here to form a contiguous block. + // + int64_t word_index_base = (read_begin + batch_begin) / 64; + for (int64_t word_index = word_index_base; + word_index <= (read_begin + (batch_begin + batch_length) - 1) / 64; ++word_index) { + split_bits_batch[word_index - word_index_base] = split_bits.GetWord(word_index); + } + + for (int bit = 0; bit <= 1; ++bit) { + // Convert bits to lists of bit indices for each bit value. + // + util::bit_util::bits_to_indexes( + bit, thread_ctx.hardware_flags, static_cast(batch_length), + reinterpret_cast(split_bits_batch), &num_ids_batch, ids_batch, + /*bit_offset=*/(read_begin + batch_begin) % 64); + + // For each bit index on the list, calculate position in the input array + // and position in the output array, then make a copy of the value. + // + for (int64_t i = 0; i < num_ids_batch; ++i) { + int64_t read_pos = read_begin + batch_begin + ids_batch[i]; + int64_t write_pos = write_offset[bit] + i; + if (MULTIPLE_SOURCE_NODES) { + // We may need to jump from one target node to the next in case of + // processing multiple source nodes. + // Update write position accordingly + // + write_pos = write_pos + (write_pos & ~target_node_mask); + } + write_pos += write_begin[bit]; + target_level_vector[write_pos] = source_level_vector[read_pos]; + } + + // Advance the write cursor for current bit value (bit 0 or 1). + // + write_offset[bit] += num_ids_batch; + } + + END_MINI_BATCH_FOR +} + +template +void MergeTree::SplitSubset(int source_level, const T* source_level_vector, + T* target_level_vector, int64_t read_begin, int64_t read_end, + ThreadContext& thread_ctx) { + auto& split_bits = bit_matrix_.GetRow(source_level); + int64_t source_node_length = (1LL << source_level); + bool single_node = (read_end - read_begin) <= source_node_length; + + // Calculate initial output positions for bits 0 and bits 1 respectively + // and call a helper function to do the remaining processing. + // + int64_t source_node_begin = NodeBegin(source_level, read_begin); + int64_t target_node_length = (1LL << (source_level - 1)); + int64_t write_begin[2]; + write_begin[1] = split_bits.Rank(read_begin); + write_begin[0] = read_begin - write_begin[1]; + write_begin[0] += source_node_begin / 2; + write_begin[1] += source_node_begin / 2 + target_node_length; + + if (single_node) { + // The case when the entire input subset is contained within a single + // node in the source level. + // + SplitSubsetImp(split_bits, source_level, source_level_vector, + target_level_vector, read_begin, read_end, write_begin[0], + write_begin[1], thread_ctx); + } else { + SplitSubsetImp(split_bits, source_level, source_level_vector, + target_level_vector, read_begin, read_end, write_begin[0], + write_begin[1], thread_ctx); + } +} + +void MergeTree::SetMorselLoglen(int morsel_loglen) { morsel_loglen_ = morsel_loglen; } + +uint64_t MergeTree::GetWordUnaligned(const BitWeaverNavigator& source, int64_t bit_index, + int num_bits) { + ARROW_DCHECK(num_bits > 0 && num_bits <= 64); + int64_t word_index = bit_index / 64; + int64_t word_offset = bit_index % 64; + uint64_t word = source.GetWord(word_index) >> word_offset; + if (word_offset + num_bits > 64) { + word |= source.GetWord(word_index + 1) << (64 - word_offset); + } + word &= (~0ULL >> (64 - num_bits)); + return word; +} + +void MergeTree::UpdateWord(BitWeaverNavigator& target, int64_t bit_index, int num_bits, + uint64_t bits) { + ARROW_DCHECK(num_bits > 0 && num_bits <= 64); + ARROW_DCHECK(bit_index % 64 + num_bits <= 64); + int64_t word_index = bit_index / 64; + int64_t word_offset = bit_index % 64; + uint64_t mask = (~0ULL >> (64 - num_bits)) << word_offset; + bits = ((bits << word_offset) & mask); + target.SetWord(word_index, (target.GetWord(word_index) & ~mask) | bits); +} + +void MergeTree::BitMemcpy(const BitWeaverNavigator& source, BitWeaverNavigator& target, + int64_t source_begin, int64_t source_end, + int64_t target_begin) { + int64_t num_bits = source_end - source_begin; + if (num_bits == 0) { return; } - int height = 1 + arrow::bit_util::Log2(num_rows); - level_bitvecs_.resize(height); - level_popcounts_.resize(height); + int64_t target_end = target_begin + num_bits; + int64_t target_word_begin = target_begin / 64; + int64_t target_word_end = (target_end - 1) / 64 + 1; + int64_t target_offset = target_begin % 64; - int64_t num_bit_words = arrow::bit_util::CeilDiv(num_rows, 64); + // Process the first and the last target word. + // + if (target_word_end - target_word_begin == 1) { + // There is only one output word + // + uint64_t input = GetWordUnaligned(source, source_begin, static_cast(num_bits)); + UpdateWord(target, target_begin, static_cast(num_bits), input); + return; + } else { + // First output word + // + int num_bits_first_word = static_cast(64 - target_offset); + uint64_t input = GetWordUnaligned(source, source_begin, num_bits_first_word); + UpdateWord(target, target_begin, num_bits_first_word, input); + + // Last output word + // + int num_bits_last_word = (target_end % 64 == 0) ? 64 : (target_end % 64); + input = GetWordUnaligned(source, source_end - num_bits_last_word, num_bits_last_word); + UpdateWord(target, target_end - num_bits_last_word, num_bits_last_word, input); + } - // We skip level 0 on purpose - it is not used. - // We also skip num_levels_to_skip from the top. + // Index of source word containing the last bit that needs to be copied to + // the first target word. // - for (int level = 1; level < height - num_levels_to_skip; ++level) { - level_bitvecs_[level].resize(num_bit_words); - level_popcounts_[level].resize(num_bit_words); + int64_t source_word_begin = + (source_begin + (target_word_begin * 64 + 63) - target_begin) / 64; + + // The case of aligned bit sequences + // + if (target_offset == (source_begin % 64)) { + for (int64_t target_word = target_word_begin + 1; target_word < target_word_end - 1; + ++target_word) { + int64_t source_word = source_word_begin + (target_word - target_word_begin); + target.SetWord(target_word, source.GetWord(source_word)); + } + return; } - std::vector permutation_temp[2]; - permutation_temp[0].resize(num_rows); - permutation_temp[1].resize(num_rows); - int64_t* permutation_pingpong[2]; - permutation_pingpong[0] = permutation_temp[0].data(); - permutation_pingpong[1] = permutation_temp[1].data(); + int64_t first_unprocessed_source_bit = source_begin + (64 - target_offset); + + // Number of bits from a single input word carried from one output word to + // the next + // + int num_carry_bits = 64 - first_unprocessed_source_bit % 64; + ARROW_DCHECK(num_carry_bits > 0 && num_carry_bits < 64); + + // Carried bits + // + uint64_t carry = GetWordUnaligned(source, first_unprocessed_source_bit, num_carry_bits); + + // Process target words between the first and the last. + // + for (int64_t target_word = target_word_begin + 1; target_word < target_word_end - 1; + ++target_word) { + int64_t source_word = source_word_begin + (target_word - target_word_begin); + uint64_t input = source.GetWord(source_word); + uint64_t output = carry | (input << num_carry_bits); + target.SetWord(target_word, output); + carry = input >> (64 - num_carry_bits); + } +} + +void MergeTree::GetChildrenBoundaries(const BitWeaverNavigator& split_bits, + int64_t num_source_nodes, + int64_t* source_node_begins, + int64_t* target_node_begins) { + for (int64_t source_node_index = 0; source_node_index < num_source_nodes; + ++source_node_index) { + int64_t node_begin = source_node_begins[source_node_index]; + int64_t node_end = source_node_begins[source_node_index + 1]; + target_node_begins[2 * source_node_index + 0] = node_begin; + if (node_begin == node_end) { + target_node_begins[2 * source_node_index + 1] = node_begin; + } else { + int64_t num_bits_1 = + split_bits.RankNext(node_end - 1) - split_bits.Rank(node_begin); + int64_t num_bits_0 = (node_end - node_begin) - num_bits_1; + target_node_begins[2 * source_node_index + 1] = node_begin + num_bits_0; + } + } + int64_t num_target_nodes = 2 * num_source_nodes; + target_node_begins[num_target_nodes] = source_node_begins[num_source_nodes]; +} + +void MergeTree::BuildUpperSliceMorsel(int level_begin, int64_t* permutation_of_X, + int64_t* temp_permutation_of_X, + int64_t morsel_index, ThreadContext& thread_ctx) { + int64_t morsel_length = 1LL << morsel_loglen_; + int64_t morsel_begin = morsel_index * morsel_length; + int64_t morsel_end = std::min(length_, morsel_begin + morsel_length); + + ARROW_DCHECK((morsel_begin & (BitVectorWithCounts::kBitsPerBlock - 1)) == 0); + ARROW_DCHECK((morsel_end & (BitVectorWithCounts::kBitsPerBlock - 1)) == 0 || + morsel_end == length_); + ARROW_DCHECK(morsel_end > morsel_begin); + + int level_end = morsel_loglen_; + ARROW_DCHECK(level_begin > level_end); - // Generate tree layers top-down + std::vector node_begins[2]; + // Begin level may have multiple nodes but the morsel is contained in + // just one. // - int top_level = height - num_levels_to_skip - 1; - for (int target_level = top_level; target_level > 0; --target_level) { - int flip = target_level % 2; - const int64_t* permutation_up = - (target_level == top_level - 1) ? permutation : permutation_pingpong[flip]; - if (target_level < top_level) { - int64_t* permutation_this = permutation_pingpong[1 - flip]; - Split(target_level + 1, permutation_up, permutation_this, hardware_flags, - temp_vector_stack); + node_begins[0].resize(2); + node_begins[0][0] = morsel_begin; + node_begins[0][1] = morsel_end; + + for (int level = level_begin; level > level_end; --level) { + // Setup pointers to ping-pong buffers (for permutation of X). + // + int64_t* source_Xs; + int64_t* target_Xs; + if ((level_begin - level) % 2 == 0) { + source_Xs = permutation_of_X; + target_Xs = temp_permutation_of_X; + } else { + source_Xs = temp_permutation_of_X; + target_Xs = permutation_of_X; + } + + // Fill the bit vector + // + for (int64_t word_index = morsel_begin / 64; + word_index < bit_util::CeilDiv(morsel_end, 64); ++word_index) { + uint64_t word = 0; + int num_bits = (word_index == (morsel_end / 64)) ? (morsel_end % 64) : 64; + for (int i = 0; i < num_bits; ++i) { + int64_t X = source_Xs[word_index * 64 + i]; + uint64_t bit = ((X >> (level - 1)) & 1ULL); + word |= (bit << i); + } + bit_matrix_upper_slices_.GetMutableRow(level).SetWord(word_index, word); + } + + // Fill the population counters + // + int64_t block_index_begin = + (morsel_begin >> BitVectorWithCountsBase::kLogBitsPerBlock); + int64_t block_index_end = + ((morsel_end - 1) >> BitVectorWithCountsBase::kLogBitsPerBlock) + 1; + for (int64_t block_index = block_index_begin; block_index < block_index_end; + ++block_index) { + bit_matrix_upper_slices_.GetMutableRow(level).BuildMidCounts(block_index); + } + bit_matrix_upper_slices_.GetMutableRow(level).BuildTopCounts(block_index_begin, + block_index_end); + + // Setup pointers to ping-pong buffers (for node boundaries from previous + // and current level). + // + int64_t num_source_nodes = (1LL << (level_begin - level)); + int64_t num_target_nodes = 2 * num_source_nodes; + int64_t* source_node_begins; + int64_t* target_node_begins; + if ((level_begin - level) % 2 == 0) { + source_node_begins = node_begins[0].data(); + node_begins[1].resize(num_target_nodes + 1); + target_node_begins = node_begins[1].data(); + } else { + source_node_begins = node_begins[1].data(); + node_begins[0].resize(num_target_nodes + 1); + target_node_begins = node_begins[0].data(); + } + + // Compute boundaries of the children nodes (cummulative sum of children + // sizes). + // + GetChildrenBoundaries(bit_matrix_upper_slices_.GetRow(level), num_source_nodes, + source_node_begins, target_node_begins); + + // Split vector of Xs, one parent node at a time. + // Each parent node gets split into two children nodes. + // Parent and child nodes can have arbitrary sizes, including zero. + // + for (int64_t source_node_index = 0; source_node_index < num_source_nodes; + ++source_node_index) { + SplitSubsetImp( + bit_matrix_upper_slices_.GetRow(level), level, source_Xs, target_Xs, + source_node_begins[source_node_index], + source_node_begins[source_node_index + 1], + target_node_begins[2 * source_node_index + 0], + target_node_begins[2 * source_node_index + 1], thread_ctx); } - const int64_t* permutation_this = - (target_level == top_level) ? permutation : permutation_pingpong[1 - flip]; - GenBitvec(target_level, permutation_this); } } -void MergeTree::RangeQueryStep(int level, int64_t num_queries, const int64_t* begins, - const int64_t* ends, RangeQueryState* query_states, - RangeQueryState* query_outputs) const { - for (int64_t iquery = 0; iquery < num_queries; ++iquery) { - int64_t begin = begins[iquery]; - int64_t end = ends[iquery]; - RangeQueryState& state = query_states[iquery]; - RangeQueryState& output = query_outputs[iquery]; - ARROW_DCHECK(begin <= end && begin >= 0 && end <= num_rows_); - - RangeQueryState parent_state; - parent_state.pos[0] = state.pos[0]; - parent_state.pos[1] = state.pos[1]; - state.pos[0] = state.pos[1] = output.pos[0] = output.pos[1] = RangeQueryState::kEmpty; - - for (int iparent_pos = 0; iparent_pos < 2; ++iparent_pos) { - int64_t parent_pos = parent_state.pos[iparent_pos]; - if (parent_pos != RangeQueryState::kEmpty) { - RangeQueryState child_state; - Cascade(level, parent_pos, &child_state); - for (int ichild_pos = 0; ichild_pos < 2; ++ichild_pos) { - int64_t child_pos = child_state.pos[ichild_pos]; - if (child_pos != RangeQueryState::kEmpty) { - int64_t child_node; - int64_t child_length; - RangeQueryState::NodeAndLengthFromPos(level - 1, child_pos, &child_node, - &child_length); - if (NodeFullyInsideRange(level - 1, child_node, begin, end)) { - output.AppendPos(child_pos); - } else if (NodePartiallyInsideRange(level - 1, child_node, begin, end)) { - state.AppendPos(child_pos); +void MergeTree::CombineUpperSlicesMorsel(int level_begin, int64_t output_morsel, + int64_t* input_permutation_of_X, + int64_t* output_permutation_of_X, + ThreadContext& thread_ctx) { + int level_end = morsel_loglen_; + ARROW_DCHECK(level_begin > level_end); + + int64_t morsel_length = 1LL << morsel_loglen_; + int64_t output_morsel_begin = output_morsel * morsel_length; + int64_t output_morsel_end = std::min(length_, output_morsel_begin + morsel_length); + + int64_t begin_level_node_length = (1LL << level_begin); + + // Copy bits for begin level bit vector. + // + ARROW_DCHECK(output_morsel_begin % 64 == 0); + for (int64_t word_index = output_morsel_begin / 64; + word_index <= (output_morsel_end - 1) / 64; ++word_index) { + bit_matrix_.GetMutableRow(level_begin) + .SetWord(word_index, + bit_matrix_upper_slices_.GetRow(level_begin).GetWord(word_index)); + } + + // For each node of the top level + // (every input morsel is contained in one such node): + // + for (int64_t begin_level_node = 0; + begin_level_node < bit_util::CeilDiv(length_, begin_level_node_length); + ++begin_level_node) { + int64_t begin_level_node_begin = begin_level_node * begin_level_node_length; + int64_t begin_level_node_end = + std::min(length_, begin_level_node_begin + begin_level_node_length); + + int64_t num_input_morsels = + bit_util::CeilDiv(begin_level_node_end - begin_level_node_begin, morsel_length); + + std::vector slice_node_begins[2]; + for (int64_t input_morsel = 0; input_morsel < num_input_morsels; ++input_morsel) { + slice_node_begins[0].push_back(begin_level_node_begin + + input_morsel * morsel_length); + } + slice_node_begins[0].push_back(begin_level_node_end); + + for (int level = level_begin - 1; level >= level_end; --level) { + std::vector* parent_node_begins; + std::vector* child_node_begins; + if ((level_begin - level) % 2 == 1) { + parent_node_begins = &slice_node_begins[0]; + child_node_begins = &slice_node_begins[1]; + } else { + parent_node_begins = &slice_node_begins[1]; + child_node_begins = &slice_node_begins[0]; + } + child_node_begins->resize((parent_node_begins->size() - 1) * 2 + 1); + + GetChildrenBoundaries(bit_matrix_upper_slices_.GetRow(level + 1), + static_cast(parent_node_begins->size()) - 1, + parent_node_begins->data(), child_node_begins->data()); + + // Scan all output nodes and all input nodes for each of them. + // + // Filter to the subset of input-output node pairs that cross the output + // morsel boundary. + // + int64_t num_output_nodes = (1LL << (level_begin - level)); + for (int64_t output_node = 0; output_node < num_output_nodes; ++output_node) { + int64_t output_node_length = 1LL << level; + int64_t output_begin = begin_level_node_begin + output_node * output_node_length; + for (int64_t input_morsel = 0; input_morsel < num_input_morsels; ++input_morsel) { + // Boundaries of the input node for a given input morsel and a given + // output node. + // + int64_t input_begin = + (*child_node_begins)[input_morsel * num_output_nodes + output_node]; + int64_t input_end = + (*child_node_begins)[input_morsel * num_output_nodes + output_node + 1]; + int64_t input_length = input_end - input_begin; + if (output_morsel_end > output_begin && + output_morsel_begin < output_begin + input_length) { + // Clamp the copy request to have the output range within the output + // morsel. + // + int64_t target_begin = std::max(output_morsel_begin, output_begin); + int64_t target_end = std::min(output_morsel_end, output_begin + input_length); + + if (level == level_end) { + // Reorder chunks of vector of X for level_end. + // + memcpy(output_permutation_of_X + target_begin, + input_permutation_of_X + input_begin + (target_begin - output_begin), + (target_end - target_begin) * sizeof(input_permutation_of_X[0])); + } else { + // Reorder bits in the split bit vector for all levels above + // level_end. + // + BitMemcpy(bit_matrix_upper_slices_.GetRow(level), + bit_matrix_.GetMutableRow(level), + input_begin + (target_begin - output_begin), + input_begin + (target_end - output_begin), target_begin); } } + + // Advance write cursor + // + output_begin += input_length; } } } } -} -void MergeTree::NthElement(int64_t num_queries, const uint16_t* opt_ids, - const int64_t* begins, const int64_t* ends, - /* ns[i] must be in the range [0; ends[i] - begins[i]) */ - const int64_t* ns, int64_t* row_numbers, - util::TempVectorStack* temp_vector_stack) const { - int64_t batch_length_max = util::MiniBatch::kMiniBatchLength; - - // Allocate temporary buffers + // Fill the mid level population counters for bit vectors. + // + // Top level population counters will get initialized in a single-threaded + // section at the end of the build process. // - auto temp_begins_buf = util::TempVectorHolder( - temp_vector_stack, static_cast(batch_length_max)); - int64_t* temp_begins = temp_begins_buf.mutable_data(); + ARROW_DCHECK(output_morsel_begin % (BitVectorWithCounts::kBitsPerBlock) == 0); + int64_t block_index_begin = (output_morsel_begin / BitVectorWithCounts::kBitsPerBlock); + int64_t block_index_end = + ((output_morsel_end - 1) / BitVectorWithCounts::kBitsPerBlock) + 1; - auto temp_ends_buf = util::TempVectorHolder( - temp_vector_stack, static_cast(batch_length_max)); - int64_t* temp_ends = temp_ends_buf.mutable_data(); + for (int level = level_begin; level > level_end; --level) { + for (int64_t block_index = block_index_begin; block_index < block_index_end; + ++block_index) { + bit_matrix_.GetMutableRow(level).BuildMidCounts(block_index); + } + } +} - auto temp_ns_buf = util::TempVectorHolder( - temp_vector_stack, static_cast(batch_length_max)); - int64_t* temp_ns = temp_ns_buf.mutable_data(); +void MergeTree::BuildLower(int level_begin, int64_t morsel_index, + int64_t* begin_permutation_of_X, + int64_t* temp_permutation_of_X, ThreadContext& thread_ctx) { + int64_t morsel_length = 1LL << morsel_loglen_; + int64_t morsel_begin = morsel_index * morsel_length; + int64_t morsel_end = std::min(length_, morsel_begin + morsel_length); + int64_t begin_level_node_length = 1LL << level_begin; + ARROW_DCHECK(morsel_begin % begin_level_node_length == 0 && + (morsel_end % begin_level_node_length == 0 || morsel_end == length_)); - for (int64_t batch_begin = 0; batch_begin < num_queries; - batch_begin += batch_length_max) { - int64_t batch_length = std::min(num_queries - batch_begin, batch_length_max); + int64_t* permutation_of_X[2]; + permutation_of_X[0] = begin_permutation_of_X; + permutation_of_X[1] = temp_permutation_of_X; - // Initialize tree cursors (begin and end of a range of some top level - // node for each query/frame). + for (int level = level_begin; level > 0; --level) { + int selector = (level_begin - level) % 2; + const int64_t* input_X = permutation_of_X[selector]; + int64_t* output_X = permutation_of_X[1 - selector]; + + // Populate bit vector for current level based on (level - 1) bits of X in + // the input vector. // - if (opt_ids) { - for (int64_t i = 0; i < batch_length; ++i) { - uint16_t id = opt_ids[batch_begin + i]; - temp_begins[i] = begins[id]; - temp_ends[i] = ends[id]; - temp_ns[i] = ns[id]; - ARROW_DCHECK(temp_ns[i] >= 0 && temp_ns[i] < temp_ends[i] - temp_begins[i]); + ARROW_DCHECK(morsel_begin % 64 == 0); + uint64_t word = 0ULL; + for (int64_t i = morsel_begin; i < morsel_end; ++i) { + word |= ((input_X[i] >> (level - 1)) & 1ULL) << (i % 64); + if (i % 64 == 63) { + bit_matrix_.GetMutableRow(level).SetWord(i / 64, word); + word = 0ULL; } - } else { - memcpy(temp_begins, begins + batch_begin, batch_length * sizeof(temp_begins[0])); - memcpy(temp_ends, ends + batch_begin, batch_length * sizeof(temp_ends[0])); - memcpy(temp_ns, ns + batch_begin, batch_length * sizeof(temp_ns[0])); + } + if (morsel_end % 64 > 0) { + bit_matrix_.GetMutableRow(level).SetWord(morsel_end / 64, word); } - // Traverse the tree top-down + // Fille population counters for bit vector. // - int top_level = static_cast(level_bitvecs_.size()) - 1; - for (int level = top_level; level > 0; --level) { - for (int64_t i = 0; i < batch_length; ++i) { - NthElementStep(level, temp_begins + i, temp_ends + i, temp_ns + i); - } + constexpr int64_t block_size = BitVectorWithCounts::kBitsPerBlock; + int64_t block_index_begin = morsel_begin / block_size; + int64_t block_index_end = (morsel_end - 1) / block_size + 1; + for (int64_t block_index = block_index_begin; block_index < block_index_end; + ++block_index) { + bit_matrix_.GetMutableRow(level).BuildMidCounts(block_index); + } + bit_matrix_.GetMutableRow(level).BuildTopCounts(block_index_begin, block_index_end, + morsel_begin / 2); + + // Split X based on the generated bit vector. + // + SplitSubset(level, input_X, output_X, morsel_begin, morsel_end, thread_ctx); + } +} + +Status MergeTree::Build(int64_t length, int level_begin, int64_t* permutation_of_X, + ParallelForStream& parallel_fors) { + morsel_loglen_ = kMinMorselLoglen; + length_ = length; + temp_permutation_of_X_.resize(length); + + // Allocate matrix bits. + // + int upper_slices_level_end = morsel_loglen_; + int num_upper_levels = std::max(0, level_begin - upper_slices_level_end); + bit_matrix_.Init(kBitMatrixBandSize, length); + for (int level = 1; level <= level_begin; ++level) { + bit_matrix_.AddRow(level); + } + bit_matrix_upper_slices_.Init(kBitMatrixBandSize, length); + for (int level = upper_slices_level_end + 1; level <= level_begin; ++level) { + bit_matrix_upper_slices_.AddRow(level); + } + + int64_t num_morsels = bit_util::CeilDiv(length_, 1LL << morsel_loglen_); + + // Upper slices of merge tree are generated for levels for which the size of + // each node is greater than a single morsel. + // + // If there are such level, then add parallel for loops that create upper + // slices and then combine them. + // + if (num_upper_levels > 0) { + parallel_fors.InsertParallelFor( + num_morsels, + [this, level_begin, permutation_of_X](int64_t morsel_index, + ThreadContext& thread_context) -> Status { + BuildUpperSliceMorsel(level_begin, permutation_of_X, + temp_permutation_of_X_.data(), morsel_index, + thread_context); + return Status::OK(); + }); + parallel_fors.InsertParallelFor( + num_morsels, + [this, level_begin, num_upper_levels, permutation_of_X]( + int64_t morsel_index, ThreadContext& thread_context) -> Status { + CombineUpperSlicesMorsel( + level_begin, morsel_index, + (num_upper_levels % 2 == 0) ? permutation_of_X + : temp_permutation_of_X_.data(), + (num_upper_levels % 2 == 0) ? temp_permutation_of_X_.data() + : permutation_of_X, + thread_context); + return Status::OK(); + }); + } + parallel_fors.InsertParallelFor( + num_morsels, + [this, level_begin, num_upper_levels, upper_slices_level_end, permutation_of_X]( + int64_t morsel_index, ThreadContext& thread_context) -> Status { + BuildLower(std::min(level_begin, upper_slices_level_end), morsel_index, + (num_upper_levels > 0 && (num_upper_levels % 2 == 0)) + ? temp_permutation_of_X_.data() + : permutation_of_X, + (num_upper_levels > 0 && (num_upper_levels % 2 == 0)) + ? permutation_of_X + : temp_permutation_of_X_.data(), + thread_context); + return Status::OK(); + }); + parallel_fors.InsertTaskSingle( + [this, level_begin](int64_t morsel_index, ThreadContext& thread_context) -> Status { + // Fill the top level population counters for upper level bit vectors. + // + int level_end = morsel_loglen_; + int64_t num_blocks = + bit_util::CeilDiv(length_, BitVectorWithCountsBase::kBitsPerBlock); + for (int level = level_begin; level > level_end; --level) { + bit_matrix_.GetMutableRow(level).BuildTopCounts(0, num_blocks, 0); + } + + // Release the pair of temporary vectors representing permutation of + // X. + // + std::vector().swap(temp_permutation_of_X_); + + return Status::OK(); + }); + + return Status::OK(); +} + +void MergeTree::BoxQuery(const BoxQueryRequest& queries, ThreadContext& thread_ctx) { + auto temp_vector_stack = thread_ctx.temp_vector_stack; // For TEMP_VECTOR + TEMP_VECTOR(int64_t, partial_results0); + TEMP_VECTOR(int64_t, partial_results1); + TEMP_VECTOR(int64_t, y_ends_copy); + + int64_t child_cursors[5]; + child_cursors[4] = kEmptyRange; + + // Split processing into mini batches, in order to use small buffers on + // the stack (and in CPU cache) for intermediate vectors. + // + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, queries.num_queries) + + // Preserve initial state, that is the upper bound on y coordinate. + // It will be overwritten for each range of the frame, during tree traversal. + // + if (queries.num_x_ranges > 1) { + for (int64_t i = 0; i < batch_length; ++i) { + y_ends_copy[i] = queries.states[batch_begin + i].ends[0]; } + } + + for (int x_range_index = 0; x_range_index < queries.num_x_ranges; ++x_range_index) { + const int64_t* xbegins = queries.xbegins[x_range_index]; + const int64_t* xends = queries.xends[x_range_index]; - // Output results + // Restore the initial state for ranges after the first one. + // Every range during its processing overwrites it. // - if (opt_ids) { + if (x_range_index > 0) { for (int64_t i = 0; i < batch_length; ++i) { - uint16_t id = opt_ids[batch_begin + i]; - row_numbers[id] = temp_begins[i]; + queries.states[batch_begin + i].ends[0] = y_ends_copy[i]; + queries.states[batch_begin + i].ends[1] = MergeTree::kEmptyRange; } - } else { - for (int64_t i = 0; i < batch_length; ++i) { - row_numbers[batch_begin + i] = temp_begins[i]; + } + + if (queries.level_begin == num_levels() - 1 && num_levels() == 1) { + // Check if the entire top level node is in X range + // + for (int i = 0; i < batch_length; ++i) { + partial_results0[i] = partial_results1[i] = kEmptyRange; } + for (int64_t query_index = batch_begin; query_index < batch_begin + batch_length; + ++query_index) { + auto& state = queries.states[query_index]; + ARROW_DCHECK(state.ends[1] == kEmptyRange); + int64_t xbegin = xbegins[query_index]; + int64_t xend = xends[query_index]; + if (state.ends[0] != kEmptyRange) { + if (NodeIntersect(num_levels() - 1, state.ends[0] - 1, xbegin, xend) == + NodeSubsetType::FULL) { + partial_results0[query_index - batch_begin] = state.ends[0]; + } + } + } + queries.report_results_callback_(num_levels() - 1, batch_begin, + batch_begin + batch_length, partial_results0, + partial_results1, thread_ctx); + } + + for (int level = queries.level_begin; level > queries.level_end; --level) { + for (int64_t query_index = batch_begin; query_index < batch_begin + batch_length; + ++query_index) { + auto& state = queries.states[query_index]; + int64_t xbegin = xbegins[query_index]; + int64_t xend = xends[query_index]; + + // Predication: kEmptyRange is replaced with special constants, + // which are always a valid input, in order to avoid conditional + // branches. + // + // We will later correct values returned by called functions for + // kEmptyRange inputs. + // + constexpr int64_t kCascadeReplacement = static_cast(1); + constexpr int64_t kIntersectReplacement = static_cast(0); + + // Use fractional cascading to traverse one level down the tree + // + for (int i = 0; i < 2; ++i) { + CascadeEnd(level, + state.ends[i] == kEmptyRange ? kCascadeReplacement : state.ends[i], + &child_cursors[2 * i + 0], &child_cursors[2 * i + 1]); + } + + // For each child node check: + // a) if it should be rejected (outside of specified range of X), + // b) if it should be included in the reported results (fully inside + // of specified range of X). + // + int node_intersects_flags = 0; + int node_inside_flags = 0; + for (int i = 0; i < 4; ++i) { + child_cursors[i] = + state.ends[i / 2] == kEmptyRange ? kEmptyRange : child_cursors[i]; + auto intersection = + NodeIntersect(level - 1, + child_cursors[i] == kEmptyRange ? kIntersectReplacement + : child_cursors[i] - 1, + xbegin, xend); + intersection = + child_cursors[i] == kEmptyRange ? NodeSubsetType::EMPTY : intersection; + node_intersects_flags |= (intersection == NodeSubsetType::PARTIAL ? 1 : 0) << i; + node_inside_flags |= (intersection == NodeSubsetType::FULL ? 1 : 0) << i; + } + + // We shouldn't have more than two bits set in each intersection bit + // masks. + // + ARROW_DCHECK(ARROW_POPCOUNT64(node_intersects_flags) <= 2); + ARROW_DCHECK(ARROW_POPCOUNT64(node_inside_flags) <= 2); + + // Shuffle generated child node cursors based on X range + // intersection results. + // + static constexpr uint8_t kNil = 4; + uint8_t source_shuffle_index[16][2] = { + {kNil, kNil}, {0, kNil}, {1, kNil}, {0, 1}, + {2, kNil}, {0, 2}, {1, 2}, {kNil, kNil}, + {3, kNil}, {0, 3}, {1, 3}, {kNil, kNil}, + {2, 3}, {kNil, kNil}, {kNil, kNil}, {kNil, kNil}}; + state.ends[0] = child_cursors[source_shuffle_index[node_intersects_flags][0]]; + state.ends[1] = child_cursors[source_shuffle_index[node_intersects_flags][1]]; + partial_results0[query_index - batch_begin] = + child_cursors[source_shuffle_index[node_inside_flags][0]]; + partial_results1[query_index - batch_begin] = + child_cursors[source_shuffle_index[node_inside_flags][1]]; + } + + // Report partial query results. + // + queries.report_results_callback_(level - 1, batch_begin, batch_begin + batch_length, + partial_results0, partial_results1, thread_ctx); } } + + END_MINI_BATCH_FOR } -void MergeTree::GenBitvec( - /* level to generate for */ int level, const int64_t* permutation) { - uint64_t result = 0ULL; - for (int64_t base = 0; base < num_rows_; base += 64) { - for (int64_t i = base; i < std::min(base + 64, num_rows_); ++i) { - int64_t bit = (permutation[i] >> (level - 1)) & 1; - result |= static_cast(bit) << (i & 63); +void MergeTree::BoxCountQuery(int64_t num_queries, int num_x_ranges_per_query, + const int64_t** x_begins, const int64_t** x_ends, + const int64_t* y_ends, int64_t* results, + ThreadContext& thread_context) { + // Callback function that updates the final count based on node prefixes + // representing subsets that satisfy query constraints. + // + // There is one callback call per batch per level. + // + auto callback = [results](int level, int64_t batch_begin, int64_t batch_end, + const int64_t* partial_results0, + const int64_t* partial_results1, + ThreadContext& thread_context) { + // Mask used to separate node offset from the offset within the node at + // the current level. + // + int64_t mask = (1LL << level) - 1LL; + for (int64_t query_index = batch_begin; query_index < batch_end; ++query_index) { + int64_t partial_result0 = partial_results0[query_index]; + int64_t partial_result1 = partial_results1[query_index]; + + // We may have between 0 and 2 node prefixes that satisfy query + // constraints for each query. + // + // To find out their number we need to check if each of the two reported + // indices is equal to kEmptyRange. + // + // For a valid node prefix, the index reported represents the position + // one after the last element in the node prefix. + // + if (partial_result0 != kEmptyRange) { + results[query_index] += ((partial_result0 - 1) & mask) + 1; + } + if (partial_result1 != kEmptyRange) { + results[query_index] += ((partial_result1 - 1) & mask) + 1; + } } - level_bitvecs_[level][base / 64] = result; - result = 0ULL; + }; + + auto temp_vector_stack = thread_context.temp_vector_stack; + TEMP_VECTOR(MergeTree::BoxQueryState, states); + + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_queries) + + // Populate BoxQueryRequest structure. + // + MergeTree::BoxQueryRequest request; + request.report_results_callback_ = callback; + request.num_queries = batch_length; + request.num_x_ranges = num_x_ranges_per_query; + for (int range_index = 0; range_index < num_x_ranges_per_query; ++range_index) { + request.xbegins[range_index] = x_begins[range_index] + batch_begin; + request.xends[range_index] = x_ends[range_index] + batch_begin; + } + request.level_begin = num_levels() - 1; + request.level_end = 0; + request.states = states; + for (int64_t i = 0; i < num_queries; ++i) { + int64_t y_end = y_ends[batch_begin + i]; + states[i].ends[0] = (y_end == 0) ? MergeTree::kEmptyRange : y_end; + states[i].ends[1] = MergeTree::kEmptyRange; } - BitVectorNavigator::GenPopCounts(num_rows_, level_bitvecs_[level].data(), - level_popcounts_[level].data()); + BoxQuery(request, thread_context); + + END_MINI_BATCH_FOR } -void MergeTree::Cascade(int level, int64_t pos, RangeQueryState* result) const { - ARROW_DCHECK(level > 0); +bool MergeTree::NOutOfBounds(const NthQueryRequest& queries, int64_t query_index) { + int64_t num_elements = 0; + for (int y_range_index = 0; y_range_index < queries.num_y_ranges; ++y_range_index) { + int64_t ybegin = queries.ybegins[y_range_index][query_index]; + int64_t yend = queries.yends[y_range_index][query_index]; + num_elements += yend - ybegin; + } + int64_t N = queries.states[query_index].pos; + return N < 0 || N >= num_elements; +} - int64_t node; - int64_t length; - RangeQueryState::NodeAndLengthFromPos(level, pos, &node, &length); +void MergeTree::NthQuery(const NthQueryRequest& queries, ThreadContext& thread_ctx) { + ARROW_DCHECK(queries.num_y_ranges >= 1 && queries.num_y_ranges <= 3); - int64_t node_begin = node << level; - // We use RankNext for node_begin + length - 1 instead of Rank for node_begin - // + length, because the latter one may be equal to num_rows_ which is an - // index out of range for bitvector. + auto temp_vector_stack = thread_ctx.temp_vector_stack; // For TEMP_VECTOR + TEMP_VECTOR(int64_t, pos); + TEMP_VECTOR(int64_t, ybegins0); + TEMP_VECTOR(int64_t, yends0); + TEMP_VECTOR(int64_t, ybegins1); + TEMP_VECTOR(int64_t, yends1); + TEMP_VECTOR(int64_t, ybegins2); + TEMP_VECTOR(int64_t, yends2); + int64_t* ybegins[3]; + int64_t* yends[3]; + ybegins[0] = ybegins0; + ybegins[1] = ybegins1; + ybegins[2] = ybegins2; + yends[0] = yends0; + yends[1] = yends1; + yends[2] = yends2; + + // Split processing into mini batches, in order to use small buffers on + // the stack (and in CPU cache) for intermediate vectors. // - int64_t rank = - BitVectorNavigator::RankNext(node_begin + length - 1, level_bitvecs_[level].data(), - level_popcounts_[level].data()); - int64_t local_rank = rank - (node_begin / 2); - result->pos[0] = - RangeQueryState::PosFromNodeAndLength(level - 1, node * 2, length - local_rank); - bool has_right_child = (node_begin + (1LL << (level - 1))) < num_rows_; - result->pos[1] = has_right_child ? RangeQueryState::PosFromNodeAndLength( - level - 1, node * 2 + 1, local_rank) - : RangeQueryState::kEmpty; -} + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, queries.num_queries) -void MergeTree::Cascade_Begin(int level, int64_t begin, int64_t* lbegin, - int64_t* rbegin) const { - if (begin == num_rows_ || begin == RangeQueryState::kEmpty) { - *lbegin = *rbegin = RangeQueryState::kEmpty; + // Filter out queries with N out of bounds. + // + int64_t num_batch_queries = 0; + for (int64_t batch_query_index = 0; batch_query_index < batch_length; + ++batch_query_index) { + int64_t query_index = batch_begin + batch_query_index; + for (int y_range_index = 0; y_range_index < queries.num_y_ranges; ++y_range_index) { + int64_t ybegin = queries.ybegins[y_range_index][query_index]; + int64_t yend = queries.yends[y_range_index][query_index]; + // Set range boundaries to kEmptyRange for all empty ranges in + // queries. + // + ybegin = (yend == ybegin) ? kEmptyRange : ybegin; + yend = (yend == ybegin) ? kEmptyRange : yend; + ybegins[y_range_index][num_batch_queries] = ybegin; + yends[y_range_index][num_batch_queries] = yend; + } + pos[num_batch_queries] = queries.states[query_index].pos; + num_batch_queries += NOutOfBounds(queries, query_index) ? 0 : 1; } - ARROW_DCHECK(begin >= 0 && begin < num_rows_); - int64_t node_begin_mask = ~((static_cast(1) << level) - 1); - int64_t half_node_begin = (begin & node_begin_mask) / 2; - int64_t half_node_length = (1LL << (level - 1)); - int64_t rank = BitVectorNavigator::Rank(begin, level_bitvecs_[level].data(), - level_popcounts_[level].data()); - *lbegin = (begin - rank) + half_node_begin; - *rbegin = rank + half_node_begin + half_node_length; - - int64_t lnode_end = half_node_begin * 2 + half_node_length; - int64_t rnode_end = lnode_end + half_node_length; - if (*lbegin >= lnode_end) { - *lbegin = RangeQueryState::kEmpty; + + for (int level = num_levels() - 1; level > 0; --level) { + // For all batch queries post filtering + // + for (int64_t batch_query_index = 0; batch_query_index < num_batch_queries; + ++batch_query_index) { + // Predication: kEmptyRange is replaced with special constants, which + // are always a valid input, in order to avoid conditional branches. + // + // We will later correct values returned by called functions for + // kEmptyRange inputs. + // + constexpr int64_t kBeginReplacement = static_cast(0); + constexpr int64_t kEndReplacement = static_cast(1); + int64_t ybegin[6]; + int64_t yend[6]; + int64_t num_elements_in_left = 0; + for (int y_range_index = 0; y_range_index < queries.num_y_ranges; ++y_range_index) { + int64_t ybegin_parent = ybegins[y_range_index][batch_query_index]; + int64_t yend_parent = yends[y_range_index][batch_query_index]; + + // Use fractional cascading to map range of elements in parent node + // to corresponding ranges of elements in two child nodes. + // + CascadeBegin(level, + ybegin_parent == kEmptyRange ? kBeginReplacement : ybegin_parent, + &ybegin[y_range_index], &ybegin[3 + y_range_index]); + CascadeEnd(level, yend_parent == kEmptyRange ? kEndReplacement : yend_parent, + ¥d[y_range_index], ¥d[y_range_index + 3]); + + // Check if any of the resulting ranges in child nodes is empty and + // update boundaries accordingly. + // + bool empty_parent = ybegin_parent == kEmptyRange || yend_parent == kEmptyRange; + for (int i = 0; i < 2; ++i) { + int child_range_index = y_range_index + 3 * i; + bool empty_range = ybegin[child_range_index] == kEmptyRange || + yend[child_range_index] == kEmptyRange || + ybegin[child_range_index] == yend[child_range_index]; + ybegin[child_range_index] = + (empty_parent || empty_range) ? kEmptyRange : ybegin[child_range_index]; + yend[child_range_index] = + (empty_parent || empty_range) ? kEmptyRange : yend[child_range_index]; + } + + // Update the number of elements in all ranges in left child. + // + num_elements_in_left += yend[y_range_index] - ybegin[y_range_index]; + } + + // Decide whether to traverse down to the left or to the right child. + // + int64_t N = pos[batch_query_index] - NodeBegin(level, pos[batch_query_index]); + int child_index = N < num_elements_in_left ? 0 : 1; + + // Update range boundaries for the selected child node. + // + for (int y_range_index = 0; y_range_index < queries.num_y_ranges; ++y_range_index) { + ybegins[y_range_index][batch_query_index] = + ybegin[y_range_index + 3 * child_index]; + yends[y_range_index][batch_query_index] = ybegin[y_range_index + 3 * child_index]; + } + + // Update node index and N for the selected child node. + // + int64_t child_node_length = 1LL << (level - 1); + pos[batch_query_index] += + child_node_length * child_index - num_elements_in_left * child_index; + } } - if (*rbegin >= rnode_end || *rbegin >= num_rows_) { - *rbegin = RangeQueryState::kEmpty; + + // Expand results of filtered batch queries to update the array of all + // query results and fill the remaining query results in this batch with + // kOutOfBounds constant. + // + num_batch_queries = 0; + for (int64_t batch_query_index = 0; batch_query_index < batch_length; + ++batch_query_index) { + int64_t query_index = batch_begin + batch_query_index; + int valid_query = NOutOfBounds(queries, query_index) ? 0 : 1; + queries.states[query_index].pos = valid_query ? pos[num_batch_queries] : kOutOfBounds; + num_batch_queries += valid_query; } + + END_MINI_BATCH_FOR } -void MergeTree::Cascade_End(int level, int64_t end, int64_t* lend, int64_t* rend) const { - if (end == 0 || end == RangeQueryState::kEmpty) { - *lend = *rend = RangeQueryState::kEmpty; +void MergeTree::DebugPrintToFile(const char* filename) const { + FILE* fout; +#if defined(_MSC_VER) && _MSC_VER >= 1400 + fopen_s(&fout, filename, "wt"); +#else + fout = fopen(filename, "wt"); +#endif + if (!fout) { return; } - ARROW_DCHECK(end > 0 && end <= num_rows_); - int64_t node_begin_mask = ~((static_cast(1) << level) - 1); - int64_t half_node_begin = ((end - 1) & node_begin_mask) / 2; - int64_t half_node_length = (1LL << (level - 1)); - int64_t rank = BitVectorNavigator::RankNext(end - 1, level_bitvecs_[level].data(), - level_popcounts_[level].data()); - *lend = (end - rank) + half_node_begin; - *rend = rank + half_node_begin + half_node_length; - - int64_t lnode_begin = half_node_begin * 2; - int64_t rnode_begin = lnode_begin + half_node_length; - if (*lend == lnode_begin) { - *lend = RangeQueryState::kEmpty; - } - if (*rend == rnode_begin) { - *rend = RangeQueryState::kEmpty; + + for (int level = num_levels() - 1; level > 0; --level) { + for (int64_t i = 0; i < length_; ++i) { + fprintf(fout, "%s", bit_matrix_.GetRow(level).GetBit(i) ? "1" : "0"); + } + fprintf(fout, "\n"); } -} -int64_t MergeTree::Cascade_Pos(int level, int64_t pos) const { - ARROW_DCHECK(pos >= 0 && pos < num_rows_); - int64_t node_begin_mask = ~((static_cast(1) << level) - 1); - int64_t half_node_begin = (pos & node_begin_mask) / 2; - int64_t half_node_length = (1LL << (level - 1)); - int64_t rank = BitVectorNavigator::Rank(pos, level_bitvecs_[level].data(), - level_popcounts_[level].data()); - bool rchild = bit_util::GetBit( - reinterpret_cast(level_bitvecs_[level].data()), pos); - return rchild ? (rank + half_node_begin + half_node_length) - : (pos - rank + half_node_begin); -} + fprintf(fout, "\n"); -bool MergeTree::NodeFullyInsideRange(int level, int64_t node, int64_t begin, - int64_t end) const { - int64_t node_begin = node << level; - int64_t node_end = - std::min(num_rows_, node_begin + (static_cast(1LL) << level)); - return node_begin >= begin && node_end <= end; -} + for (int level = num_levels() - 1; level > 0; --level) { + auto bits = bit_matrix_.GetRow(level); + bits.DebugPrintCountersToFile(fout); + } -bool MergeTree::NodePartiallyInsideRange(int level, int64_t node, int64_t begin, - int64_t end) const { - int64_t node_begin = node << level; - int64_t node_end = - std::min(num_rows_, node_begin + (static_cast(1LL) << level)); - return node_begin < end && node_end > begin; + fclose(fout); } } // namespace compute diff --git a/cpp/src/arrow/compute/exec/window_functions/merge_tree.h b/cpp/src/arrow/compute/exec/window_functions/merge_tree.h index e6c7b48c790..3cec34fca5c 100644 --- a/cpp/src/arrow/compute/exec/window_functions/merge_tree.h +++ b/cpp/src/arrow/compute/exec/window_functions/merge_tree.h @@ -18,320 +18,288 @@ #pragma once #include +#include #include "arrow/compute/exec/util.h" +#include "arrow/compute/exec/window_functions/bit_vector_navigator.h" +#include "arrow/compute/exec/window_functions/window_frame.h" #include "arrow/util/bit_util.h" -#include "bit_vector_navigator.h" namespace arrow { namespace compute { -// TODO: Support multiple [begin, end) ranges in range and nth_element queries. +// Represents a fixed set of 2D points with attributes X and Y. +// Values of each attribute across points are unique integers in the range +// [0, N - 1] for N points. +// Supports two kinds of queries: +// a) Nth element +// b) Box count / box filter // - -// One way to think about MergeTree is that, when we traverse top down, we -// switch to sortedness on X axis, and when we traverse bottom up, we switch to -// sortedness on Y axis. At the lowest level of MergeTree rows are sorted on X -// and the highest level they are sorted on Y. +// Nth element query: filter points using range predicate on Y, return the nth +// smallest X within the remaining points. +// +// Box count query: filter points using range predicate on X and less than +// predicate on Y, count and return the number of remaining points. // class MergeTree { public: - MergeTree() : num_rows_(0) {} - - void Build(int64_t num_rows, const int64_t* permutation, int num_levels_to_skip, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); - - int get_height() const { return num_rows_ ? 1 + arrow::bit_util::Log2(num_rows_) : 0; } - - template - void Split( - /* upper level */ int level, const S* in, S* out, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) const { - int64_t lower_node_length = 1LL << (level - 1); - int64_t lower_node_mask = lower_node_length - 1LL; - - int64_t batch_length_max = util::MiniBatch::kMiniBatchLength; - int num_ids; - auto ids_buf = util::TempVectorHolder( - temp_vector_stack, static_cast(batch_length_max)); - uint16_t* ids = ids_buf.mutable_data(); - - // Break into mini-batches - int64_t rank_batch_begin[2]; - rank_batch_begin[0] = 0; - rank_batch_begin[1] = 0; - for (int64_t batch_begin = 0; batch_begin < num_rows_; - batch_begin += batch_length_max) { - int64_t batch_length = std::min(num_rows_ - batch_begin, batch_length_max); - - for (int child = 0; child <= 1; ++child) { - // Get parent node positions (relative to the batch) for all elements - // coming from left child - util::bit_util::bits_to_indexes( - child, hardware_flags, static_cast(batch_length), - reinterpret_cast(level_bitvecs_[level].data() + - batch_begin / 64), - &num_ids, ids); - - for (int i = 0; i < num_ids; ++i) { - int64_t upper_pos = batch_begin + ids[i]; - int64_t rank = rank_batch_begin[child] + i; - int64_t lower_pos = (rank & ~lower_node_mask) * 2 + child * lower_node_length + - (rank & lower_node_mask); - out[lower_pos] = in[upper_pos]; - } - rank_batch_begin[child] += num_ids; - } - } - } - - // State or output for range query. - // - // Represents between zero and two different nodes from a single level of the - // tree. + // Constant used in description of boundaries of the ranges of node elements + // to indicate an empty range. // - // For each node remembers the length of its prefix, which represents a - // subrange of selected elements of that node. + static constexpr int64_t kEmptyRange = -1; + + // Constant returned from nth element query when the result is outside of the + // input range of elements. // - // Length is between 1 and the number of node elements at this level (both - // bounds inclusive), because empty set of selected elements is represented by - // a special constant kEmpty. + static constexpr int64_t kOutOfBounds = -1; + + int num_levels() const { return bit_util::Log2(length_) + 1; } + + Status Build(int64_t length, int level_begin, int64_t* permutation_of_X, + ParallelForStream& parallel_fors); + + // Internal state of a single box count / box filter query preserved between + // visiting different levels of the merge tree. // - struct RangeQueryState { - static constexpr int64_t kEmpty = ~static_cast(0); - - static int64_t PosFromNodeAndLength(int level, int64_t node, int64_t length) { - if (length == 0) { - return kEmpty; - } - return (node << level) + length - 1; - } - - static void NodeAndLengthFromPos(int level, int64_t pos, int64_t* node, - int64_t* length) { - ARROW_DCHECK(pos != kEmpty); - *node = pos >> level; - *length = 1 + pos - (*node << level); - } - - void AppendPos(int64_t new_pos) { - // One of the two positions must be set to null - // - if (pos[0] == kEmpty) { - pos[0] = new_pos; - } else { - ARROW_DCHECK(pos[1] == kEmpty); - pos[1] = new_pos; - } - } - - int64_t pos[2]; + struct BoxQueryState { + // End positions for ranges of elements sorted on Y belonging to up + // to two nodes from a single level that are active for this box query. + // + // There may be between 0 and 2 nodes represented in this state. + // If it is less than 2 we mark the remaining elements in the ends array + // with the kEmptyRange constant. + // + int64_t ends[2]; }; - // Visiting each level updates state cursor pair and outputs state cursor - // pair. + // Input and mutable state for a series of box queries // - void RangeQueryStep(int level, int64_t num_queries, const int64_t* begins, - const int64_t* ends, RangeQueryState* query_states, - RangeQueryState* query_outputs) const; + struct BoxQueryRequest { + // Callback for reporting partial query results for a batch of queries and a + // single level. + // + // The arguments are: + // - tree level, + // - range of query indices (begin and end), + // - two arrays with one element per query in a batch containing two + // cursors. Each cursor represents a prefix of elements (sorted on Y) inside + // a single node from the specified level that satisfy the query. Each + // cursor can be set to kEmptyRange constant, which indicates empty result + // set. + // + using BoxQueryCallback = std::function; + BoxQueryCallback report_results_callback_; + // Number of queries + // + int64_t num_queries; + // The predicate on X can represent a union of multiple ranges, + // but all queries need to use exactly the same number of ranges. + // + int num_x_ranges; + // Range predicates on X. + // + // Since every query can use multiple ranges it is an array of arrays. + // + // Beginnings and ends of corresponding ranges are stored in separate arrays + // of arrays. + // + const int64_t* xbegins[WindowFrames::kMaxRangesInFrame]; + const int64_t* xends[WindowFrames::kMaxRangesInFrame]; + // Range of tree levels to traverse. + // + // If the range does not represent the entire tree, then only part of + // the tree will be processed, starting from the query states provided in + // the array below. The array of query states will be updated afterwards, + // allowing subsequent call to continue processing for the remaining tree + // levels. + // + int level_begin; + int level_end; + // Query state is a pair of cursors pointing to two locations in two nodes + // in a single (level_begin) level of the tree. A cursor can be seen as a + // prefix of elements (sorted on Y) that belongs to a single node. The + // number of cursors may be less than 2, in which case one or two cursors + // are set to the kEmptyRange constant. + // + // Initially the first cursor should be set to exclusive upper bound on Y + // (kEmptyRange if 0) and the second cursor to kEmptyRange. + // + // If we split query processing into multiple steps (level_end > 0), then + // the state will be updated. + // + BoxQueryState* states; + }; - int64_t NthElement(int64_t begin, int64_t end, int64_t n) const { - ARROW_DCHECK(n >= 0 && n < end - begin); - int64_t temp_begin = begin; - int64_t temp_end = end; - int64_t temp_n = n; + void BoxQuery(const BoxQueryRequest& queries, ThreadContext& thread_ctx); + + void BoxCountQuery(int64_t num_queries, int num_x_ranges_per_query, + const int64_t** x_begins, const int64_t** x_ends, + const int64_t* y_ends, int64_t* results, + ThreadContext& thread_context); + + // Internal state of a single nth element query preserved between visiting + // different levels of the merge tree. + struct NthQueryState { + // Position within a single node from a single level that encodes: + // - the node from which the search will continue, + // - the relative position of the output X within the sorted sequence of X + // of points associated with this node. + int64_t pos; + }; - // Traverse the tree top-down + // Input and mutable state for a series of nth element queries + // + struct NthQueryRequest { + int64_t num_queries; + // Range predicates on Y. + // + // Since every query can use multiple ranges it is an array of arrays. + // + // Beginnings and ends of corresponding ranges are stored in separate arrays + // of arrays. + // + int num_y_ranges; + const int64_t** ybegins; + const int64_t** yends; + // State encodes a node (all states will point to nodes from the same level) + // and the N for the Nth element we are looking for. // - int top_level = static_cast(level_bitvecs_.size()) - 1; - for (int level = top_level; level > 0; --level) { - NthElementStep(level, &temp_begin, &temp_end, &temp_n); - } - - return temp_begin; - } - - void NthElement(int64_t num_queries, const uint16_t* opt_ids, const int64_t* begins, - const int64_t* ends, - /* ns[i] must be in the range [0; ends[i] - begins[i]) */ - const int64_t* ns, int64_t* row_numbers, - util::TempVectorStack* temp_vector_stack) const; - - const uint64_t* GetLevelBitvec(int level) const { return level_bitvecs_[level].data(); } - - void Cascade_Begin(int level, int64_t begin, int64_t* lbegin, int64_t* rbegin) const; - void Cascade_End(int level, int64_t end, int64_t* lend, int64_t* rend) const; - int64_t Cascade_Pos(int level, int64_t pos) const; - - static constexpr int64_t kEmptyRangeBoundary = static_cast(~0ULL); - - int64_t GetNodeBeginFromEnd(int level, int64_t end) const { - return ((end - 1) >> level) << level; - } - int64_t GetNodeEnd(int level, int64_t node_begin) const { - return std::min(num_rows_, node_begin + (static_cast(1) << level)); - } - - template - void MiniBatchRangeQuery(int64_t num_queries, const int64_t* x_begins, - const int64_t* x_ends, int64_t* y_ends, - util::TempVectorStack* temp_vector_stack, - T_PROCESS_OUTPUT_RANGE process_output_range) { - ARROW_DCHECK(num_queries <= util::MiniBatch::kMiniBatchLength); - - TEMP_VECTOR(int64_t, y_ends_2nd); - - auto process_node = [&](int level, int64_t iquery, int64_t y_end) { - if (y_end != kEmptyRangeBoundary) { - int64_t begin = x_begins[iquery]; - int64_t end = x_ends[iquery]; - int64_t node_begin = GetNodeBeginFromEnd(level, y_end); - if (NodeFullyInsideRange(level, node_begin >> level, begin, end)) { - process_output_range(iquery, node_begin, y_end); - } else if (NodePartiallyInsideRange(level, node_begin >> level, begin, end)) { - if (y_ends[iquery] == kEmptyRangeBoundary) { - y_ends[iquery] = y_end; - } else { - ARROW_DCHECK(y_ends_2nd[iquery] == kEmptyRangeBoundary); - y_ends_2nd[iquery] = y_end; - } - } - } - }; - - for (int level = get_height() - 1; level >= 0; --level) { - bool is_top_level = (level == (get_height() - 1)); - for (int64_t iquery = 0; iquery < num_queries; ++iquery) { - int64_t& y_end = y_ends[iquery]; - int64_t& y_end_2nd = y_ends_2nd[iquery]; - - int64_t y_ends_new[4]; - y_ends_new[0] = y_ends_new[1] = y_ends_new[2] = y_ends_new[3] = - kEmptyRangeBoundary; - - if (is_top_level) { - y_ends_new[0] = y_end; - } else { - if (y_end != kEmptyRangeBoundary) { - Cascade_End(level + 1, y_end, &y_ends_new[0], &y_ends_new[1]); - } - if (y_ends_2nd[iquery] != kEmptyRangeBoundary) { - Cascade_End(level + 1, y_end_2nd, &y_ends_new[2], &y_ends_new[3]); - } - } - - y_end = y_end_2nd = kEmptyRangeBoundary; - for (int i = 0; i < 4; ++i) { - process_node(level, iquery, y_ends_new[i]); - } - } - } - } - - void BoxCount(int num_levels_to_skip, int num_ids, uint16_t* ids, const int64_t* begins, - const int64_t* ends, int64_t* lpos, int64_t* rpos, - int64_t* counters) const { - ARROW_DCHECK(num_rows_ > 0); - if (num_rows_ == 1) { - for (int i = 0; i < num_ids; ++i) { - uint16_t id = ids[i]; - ARROW_DCHECK(ends[id] > begins[id] && lpos[id] != RangeQueryState::kEmpty && - lpos[id] > 0); - counters[id] = num_rows_; - } - } - for (int level = get_height() - 1 - num_levels_to_skip; level >= 0; --level) { - int num_ids_new = 0; - for (int64_t iquery = 0; iquery < num_ids; ++iquery) { - uint16_t id = ids[iquery]; - int64_t begin = begins[id]; - int64_t end = ends[id]; - ARROW_DCHECK(end > begin); - int64_t lpos_new, rpos_new; - if (level == get_height() - 1 - num_levels_to_skip) { - lpos_new = lpos[id]; - rpos_new = rpos[id]; - ARROW_DCHECK(lpos_new != RangeQueryState::kEmpty && - rpos_new == RangeQueryState::kEmpty); - int64_t node_begin = (((lpos_new - 1) >> level) << level); - int64_t node_end = - std::min(num_rows_, node_begin + (static_cast(1) << level)); - ARROW_DCHECK(begin >= node_begin && end < node_end); - if (begin == node_begin && end == node_end) { - counters[id] += lpos_new; - lpos_new = RangeQueryState::kEmpty; - } - } else { - int64_t pos_new[4]; - pos_new[0] = pos_new[1] = pos_new[2] = pos_new[3] = RangeQueryState::kEmpty; - if (lpos[id] != RangeQueryState::kEmpty) { - Cascade_End(level + 1, lpos[id], &pos_new[0], &pos_new[1]); - } - if (rpos[id] != RangeQueryState::kEmpty) { - Cascade_End(level + 1, rpos[id], &pos_new[2], &pos_new[3]); - } - for (int i = 0; i < 4; ++i) { - if (pos_new[i] != RangeQueryState::kEmpty) { - int64_t node_begin = (((pos_new[i] - 1) >> level) << level); - int64_t node_end = - std::min(num_rows_, node_begin + (static_cast(1) << level)); - if (begin <= node_begin && end >= node_end) { - counters[id] += (pos_new[i] - node_begin); - } else if (end > node_begin && begin < node_end) { - if (lpos_new == RangeQueryState::kEmpty) { - lpos_new = pos_new[i]; - } else { - ARROW_DCHECK(rpos_new == RangeQueryState::kEmpty); - rpos_new = pos_new[i]; - } - } - } - } - } - lpos[id] = lpos_new; - rpos[id] = rpos_new; - if (lpos_new != RangeQueryState::kEmpty) { - ids[num_ids_new++] = id; - } - } - num_ids = num_ids_new; - } - } + // When the query starts it is set directly to N in the query (N part is the + // input and node part is zero). + // + // When the query finishes it is set to the query result - a value of X that + // is Nth in the given range of Y (node part is the result and N part is + // zero). + // + NthQueryState* states; + }; + + void NthQuery(const NthQueryRequest& queries, ThreadContext& thread_ctx); private: - /* output 0 if value comes from left child and 1 otherwise */ - void GenBitvec( - /* level to generate for */ int level, - /* source permutation of rows for elements in this level */ - const int64_t* permutation); - - void Cascade(int level, int64_t pos, RangeQueryState* result) const; - - bool NodeFullyInsideRange(int level, int64_t node, int64_t begin, int64_t end) const; - - bool NodePartiallyInsideRange(int level, int64_t node, int64_t begin, - int64_t end) const; - - void NthElementStep(int level, int64_t* begin, int64_t* end, int64_t* n) const { - int64_t node_length = 1LL << level; - uint64_t node_mask = node_length - 1; - int64_t node_begin = (*begin & ~node_mask); - - int64_t rank_begin = BitVectorNavigator::Rank(*begin, level_bitvecs_[level].data(), - level_popcounts_[level].data()); - int64_t rank_end = BitVectorNavigator::RankNext( - *end - 1, level_bitvecs_[level].data(), level_popcounts_[level].data()); - int64_t length_left = (*end - *begin) - (rank_end - rank_begin); - int64_t child_mask = (length_left <= *n ? ~0LL : 0LL); - - *begin = node_begin + ((node_length / 2 + rank_begin - node_begin / 2) & child_mask) + - (((*begin - node_begin) - (rank_begin - node_begin / 2)) & ~child_mask); - *end = *begin + ((rank_end - rank_begin) & child_mask) + (length_left & ~child_mask); - *n -= (length_left & child_mask); - } - - int64_t num_rows_; - std::vector> level_bitvecs_; - std::vector> level_popcounts_; + // Return true if the given array of N elements contains a permutation of + // integers from [0, N - 1] range. + // + bool IsPermutation(int64_t length, const int64_t* values); + + // Find the beginning (index in the split bit vector) of the merge tree node + // for a given position within the range of bits for that node. + // + inline int64_t NodeBegin(int level, int64_t pos) const; + + // Find the end (index one after the last) of the merge tree node given a + // position within its range. + // + // All nodes of the level have (1 << level) elements except for the last that + // can be truncated. + // + inline int64_t NodeEnd(int level, int64_t pos) const; + + // Use split bit vector and bit vector navigator to map beginning of a + // range of Y from a parent node to both child nodes. + // + // If the child range is empty return kEmptyRange for it. + // + inline void CascadeBegin(int from_level, int64_t begin, int64_t* lbegin, + int64_t* rbegin) const; + + // Same as CascadeBegin but for the end (one after the last element) of the + // range. + // + // The difference is that end offset within the node can have values in + // [1; S] range, where S is the size of the node, while the beginning offset + // is in [0; S - 1]. + // + inline void CascadeEnd(int from_level, int64_t end, int64_t* lend, int64_t* rend) const; + + // Fractional cascading for a single element of a parent node. + // + inline int64_t CascadePos(int from_level, int64_t pos) const; + + enum class NodeSubsetType { EMPTY, PARTIAL, FULL }; + + // Check whether the intersection with respect to X axis of the range + // represented by the node and a given range is: a) empty, b) full node, c) + // partial node. + // + inline NodeSubsetType NodeIntersect(int level, int64_t pos, int64_t begin, int64_t end); + + // Split a subset of elements from the source level. + // + // When MULTIPLE_SOURCE_NODES == false, + // then the subset must be contained in a single source node (it can also + // represent the entire source node). + // + template + void SplitSubsetImp(const BitWeaverNavigator& split_bits, int source_level, + const T* source_level_vector, T* target_level_vector, + int64_t read_begin, int64_t read_end, int64_t write_begin_bit0, + int64_t write_begin_bit1, ThreadContext& thread_ctx); + + // Split a subset of elements from the source level. + // + template + void SplitSubset(int source_level, const T* source_level_vector, T* target_level_vector, + int64_t read_begin, int64_t read_end, ThreadContext& thread_ctx); + + void SetMorselLoglen(int morsel_loglen); + + // Load up to 64 bits from interleaved bit vector starting at an arbitrary bit + // index. + // + inline uint64_t GetWordUnaligned(const BitWeaverNavigator& source, int64_t bit_index, + int num_bits = 64); + + // Set a subsequence of bits within a single word inside an interleaved bit + // vector. + // + inline void UpdateWord(BitWeaverNavigator& target, int64_t bit_index, int num_bits, + uint64_t bits); + + // Copy bits while reading and writing aligned 64-bit words only. + // + // Input and output bit vectors may be logical bit vectors inside a + // collection of interleaved bit vectors of the same length (accessed + // using BitWeaverNavigator). + // + void BitMemcpy(const BitWeaverNavigator& source, BitWeaverNavigator& target, + int64_t source_begin, int64_t source_end, int64_t target_begin); + + void GetChildrenBoundaries(const BitWeaverNavigator& split_bits, + int64_t num_source_nodes, int64_t* source_node_begins, + int64_t* target_node_begins); + + void BuildUpperSliceMorsel(int level_begin, int64_t* permutation_of_X, + int64_t* temp_permutation_of_X, int64_t morsel_index, + ThreadContext& thread_ctx); + + void CombineUpperSlicesMorsel(int level_begin, int64_t output_morsel, + int64_t* input_permutation_of_X, + int64_t* output_permutation_of_X, + ThreadContext& thread_ctx); + + void BuildLower(int level_begin, int64_t morsel_index, int64_t* begin_permutation_of_X, + int64_t* temp_permutation_of_X, ThreadContext& thread_ctx); + + bool NOutOfBounds(const NthQueryRequest& queries, int64_t query_index); + + void DebugPrintToFile(const char* filename) const; + + static constexpr int kBitMatrixBandSize = 4; + static constexpr int kMinMorselLoglen = BitVectorWithCounts::kLogBitsPerBlock; + + int morsel_loglen_; + int64_t length_; + + BitMatrixWithCounts bit_matrix_; + BitMatrixWithCounts bit_matrix_upper_slices_; + + // Temp buffer used while building the tree for double buffering of the + // permutation of X (buffer for upper level is used to generate buffer for + // lower level, then we traverse down and swap the buffers). + // The other buffer is provided by the caller of the build method. + // + std::vector temp_permutation_of_X_; }; } // namespace compute diff --git a/cpp/src/arrow/compute/exec/window_functions/range_tree.cc b/cpp/src/arrow/compute/exec/window_functions/range_tree.cc deleted file mode 100644 index 1268e9378ba..00000000000 --- a/cpp/src/arrow/compute/exec/window_functions/range_tree.cc +++ /dev/null @@ -1,227 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/compute/exec/window_functions/range_tree.h" - -namespace arrow { -namespace compute { - -void RangeTree::Build(int64_t num_rows, const int64_t* x_sorted_on_z, - const int64_t* y_sorted_on_z, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - num_rows_ = num_rows; - -#ifndef NDEBUG - // Check that x, y and z are permutations of [0, num_rows) - // - ARROW_DCHECK(IsPermutation(num_rows, x_sorted_on_z)); - ARROW_DCHECK(IsPermutation(num_rows, y_sorted_on_z)); -#endif - - if (num_rows <= kMinRows) { - for (int64_t i = 0; i < num_rows; ++i) { - rows_[i].x = x_sorted_on_z[i]; - rows_[i].y = y_sorted_on_z[i]; - rows_[i].z = i; - } - return; - } - - // Build x trees, trees in which nodes are split on x coordinate. - // One of them will have bit vectors organized by y coordinate (and will be - // used for remapping y values), the other one will have bit vectors - // organized by z coordinate. - // - xtree_on_z_.Build(num_rows_, x_sorted_on_z, 0, hardware_flags, temp_vector_stack); - { - std::vector x_sorted_on_y(num_rows_); - for (int64_t i = 0; i < num_rows_; ++i) { - int64_t x = x_sorted_on_z[i]; - int64_t y = y_sorted_on_z[i]; - x_sorted_on_y[y] = x; - } - xtree_on_y_.Build(num_rows_, x_sorted_on_y.data(), 0, hardware_flags, - temp_vector_stack); - } - - // Build y trees. There is one y tree for each node of the x tree. - // The y trees for the x tree nodes from the same level are concatenated to - // make a single x tree with missing top levels (e.g. 2nd level from the top - // will contain two x trees that concatenated will make up a single 2x - // larger x tree without its top most level). - // - int height = xtree_on_z_.get_height(); - ytrees_on_z_.resize(height); - - std::vector local_y_sorted_on_local_z[2]; - local_y_sorted_on_local_z[0].resize(num_rows); - local_y_sorted_on_local_z[1].resize(num_rows); - memcpy(local_y_sorted_on_local_z[(height - 1) & 1].data(), y_sorted_on_z, - num_rows * sizeof(int64_t)); - - for (int level = height - 1; level > 0; --level) { - int this_level = (level & 1); - int level_above = 1 - this_level; - if (level < height - 1) { - xtree_on_z_.Split(level + 1, local_y_sorted_on_local_z[level_above].data(), - local_y_sorted_on_local_z[this_level].data(), hardware_flags, - temp_vector_stack); - for (int64_t i = 0; i < num_rows; ++i) { - int64_t& local_y = local_y_sorted_on_local_z[this_level][i]; - local_y = xtree_on_y_.Cascade_Pos(level + 1, local_y); - } - } - ytrees_on_z_[level].Build(num_rows, local_y_sorted_on_local_z[this_level].data(), - /* number of top levels to skip */ (height - 1) - level, - hardware_flags, temp_vector_stack); - } -} - -void RangeTree::BoxCount(int64_t num_queries, const int64_t* x_ends, - const int64_t* y_begins, const int64_t* y_ends, - const int64_t* z_ends, int64_t* out_counts, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - if (num_rows_ <= kMinRows) { - for (int64_t i = 0; i < num_queries; ++i) { - out_counts[i] = 0; - for (int64_t j = 0; j < num_rows_; ++j) { - if (rows_[j].x < x_ends[i] && rows_[j].y >= y_begins[i] && - rows_[j].y < y_ends[i] && rows_[j].z < z_ends[i]) { - ++out_counts[i]; - } - } - } - return; - } - - int num_xtree_query_ids; - TEMP_VECTOR(uint16_t, xtree_query_ids); - TEMP_VECTOR(int64_t, xtree_y_begins); - TEMP_VECTOR(int64_t, xtree_y_ends); - TEMP_VECTOR(int64_t, xtree_z_ends); - - int num_ytree_query_ids; - TEMP_VECTOR(uint16_t, ytree_query_ids); - TEMP_VECTOR(int64_t, ytree_y_begins); - TEMP_VECTOR(int64_t, ytree_y_ends); - TEMP_VECTOR(int64_t, ytree_left_z_ends); - TEMP_VECTOR(int64_t, ytree_right_z_ends); - - auto add_xtree_query = [&](uint16_t id, int64_t y_begin, int64_t y_end, int64_t z_end) { - xtree_query_ids[num_xtree_query_ids++] = id; - xtree_y_begins[id] = y_begin; - xtree_y_ends[id] = y_end; - xtree_z_ends[id] = z_end; - }; - - auto add_ytree_query = [&](uint16_t id, int64_t y_begin, int64_t y_end, int64_t z_end) { - ytree_query_ids[num_ytree_query_ids] = id; - ytree_left_z_ends[id] = z_end; - ytree_right_z_ends[id] = MergeTree::RangeQueryState::kEmpty; - ytree_y_begins[id] = y_begin; - ytree_y_ends[id] = y_end; - ++num_ytree_query_ids; - }; - - auto try_query = [&](int level, int64_t batch_begin, uint16_t id, int64_t x_end, - int64_t y_begin, int64_t y_end, int64_t z_end) { - if (y_begin != MergeTree::RangeQueryState::kEmpty && - y_end != MergeTree::RangeQueryState::kEmpty && - z_end != MergeTree::RangeQueryState::kEmpty && z_end > 0) { - int64_t node_x_begin = (((z_end - 1) >> level) << level); - int64_t node_x_end = - std::min(num_rows_, node_x_begin + (static_cast(1) << level)); - if (x_end > node_x_begin && y_begin < y_end) { - if (level == 0) { - out_counts[batch_begin + id] += 1; - } else { - if (node_x_end <= x_end) { - add_xtree_query(id, y_begin, y_end, z_end); - } else if (node_x_begin < x_end) { - add_ytree_query(id, y_begin, y_end, z_end); - } - } - } - } - }; - - int height = xtree_on_z_.get_height(); - BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows_) - memset(out_counts + batch_begin, 0, batch_length * sizeof(int64_t)); - - for (int level = height - 1; level >= 0; --level) { - num_xtree_query_ids = 0; - num_ytree_query_ids = 0; - if (level == height - 1) { - for (int64_t i = batch_begin; i < batch_begin + batch_length; ++i) { - uint16_t id = static_cast(i - batch_begin); - int64_t x_end = x_ends[i]; - int64_t y_begin = y_begins[i]; - int64_t y_end = y_ends[i]; - int64_t z_end = z_ends[i]; - try_query(height - 1, batch_begin, id, x_end, y_begin, y_end, z_end); - } - } else { - for (int64_t i = 0; i < num_xtree_query_ids; ++i) { - uint16_t id = xtree_query_ids[i]; - int64_t x_end = x_ends[batch_begin + id]; - int64_t y_begin = xtree_y_begins[id]; - int64_t y_end = xtree_y_ends[id]; - int64_t z_end = xtree_z_ends[id]; - int64_t y_lbegin, y_rbegin; - int64_t y_lend, y_rend; - int64_t z_lend, z_rend; - xtree_on_y_.Cascade_Begin(level + 1, y_begin, &y_lbegin, &y_rbegin); - xtree_on_y_.Cascade_End(level + 1, y_end, &y_lend, &y_rend); - xtree_on_z_.Cascade_End(level + 1, z_end, &z_lend, &z_rend); - try_query(level, batch_begin, id, x_end, y_lbegin, y_lend, z_lend); - try_query(level, batch_begin, id, x_end, y_rbegin, y_rend, z_rend); - } - } - - if (level > 0) { - ytrees_on_z_[level].BoxCount( - height - 1 - level, num_ytree_query_ids, ytree_query_ids, ytree_y_begins, - ytree_y_ends, ytree_left_z_ends, ytree_right_z_ends, out_counts + batch_begin); - } - } - END_MINI_BATCH_FOR -} - -#ifndef NDEBUG -bool RangeTree::IsPermutation(int64_t num_rows, const int64_t* values) { - std::vector present(num_rows); - for (int64_t i = 0; i < num_rows; ++i) { - present[i] = false; - } - for (int64_t i = 0; i < num_rows; ++i) { - int64_t value = values[i]; - if (value >= 0 && value < num_rows) { - return false; - } - if (!present[value]) { - return false; - } - present[value] = true; - } - return true; -} -#endif - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/range_tree.h b/cpp/src/arrow/compute/exec/window_functions/range_tree.h deleted file mode 100644 index a7581ddc50c..00000000000 --- a/cpp/src/arrow/compute/exec/window_functions/range_tree.h +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include "arrow/compute/exec/util.h" -#include "arrow/compute/exec/window_functions/bit_vector_navigator.h" -#include "arrow/compute/exec/window_functions/merge_tree.h" - -namespace arrow { -namespace compute { - -// All three coordinates (x, y and z) are unique integers from the range [0, -// num_rows). -// -// We also refer to local coordinates within the context of a level of a merge -// tree. Local coordinate (x, y or z) would be a result of mapping original -// coordinate by computing its rank (position in the sequence sorted on this -// coordinate) within the node of the tree from that level, plus the index of -// the first element of that node in a vector representing the level of the -// tree. -// -class RangeTree { - public: - void Build(int64_t num_rows, const int64_t* x_sorted_on_z, const int64_t* y_sorted_on_z, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); - - void BoxCount(int64_t num_queries, const int64_t* x_ends, const int64_t* y_begins, - const int64_t* y_ends, const int64_t* z_ends, int64_t* out_counts, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); - - private: -#ifndef NDEBUG - bool IsPermutation(int64_t num_rows, const int64_t* values); -#endif - - static constexpr int64_t kMinRows = 2; - - int64_t num_rows_; - struct { - int64_t x, y, z; - } rows_[kMinRows]; - // Tree splitting on x coordinate - MergeTree xtree_on_y_; // with bitvectors indexed by y coordinate - MergeTree xtree_on_z_; // with bitvectors indexed by z coordinate - // Collection of trees splitting on y coordinate (one tree for each node of - // the xtree) - std::vector ytrees_on_z_; // with bitvectors indexed by z coordinate -}; - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc b/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc deleted file mode 100644 index 1968b13f3f4..00000000000 --- a/cpp/src/arrow/compute/exec/window_functions/splay_tree.cc +++ /dev/null @@ -1,610 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/compute/exec/window_functions/splay_tree.h" - -namespace arrow { -namespace compute { - -SplayTree::SplayTree() { Clear(); } - -void SplayTree::Insert(int64_t value) { - index_t rank; - index_t parent_id; - int parent_side; - index_t node_id; - Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); - - if (node_id != kNilId) { - ++nodes_[node_id].value_count; - ++nodes_[node_id].subtree_count[kCountStar]; - while (parent_id != kNilId) { - NodeType& node = nodes_[parent_id]; - ++node.subtree_count[kCountStar]; - parent_id = node.parent_id; - } - -#ifndef NDEBUG - ValidateTree(); -#endif - - return; - } - - index_t new_node_id = AllocateNode(); - NodeType& new_node = nodes_[new_node_id]; - new_node.value = value; - new_node.value_count = 0; - new_node.subtree_count[0] = new_node.subtree_count[1] = 0; - new_node.parent_id = parent_id; - new_node.child_id[0] = new_node.child_id[1] = kNilId; - if (parent_id == kNilId) { - root_id_ = new_node_id; - } else { - nodes_[parent_id].child_id[parent_side] = new_node_id; - Splay(new_node_id); - } - nodes_[root_id_].value_count = 1; - for (int i = 0; i < 2; ++i) { - ++nodes_[root_id_].subtree_count[i]; - } - -#ifndef NDEBUG - ValidateTree(); -#endif -} - -void SplayTree::Remove(int64_t value) { - index_t rank; - index_t parent_id; - int parent_side; - index_t node_id; - Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); - - // Noop if value is not present - if (node_id == kNilId) { - return; - } - - NodeType* node = &nodes_[node_id]; - - // Decrease subtree_count for all ancestors of the node. - // - for (index_t x = parent_id; x != kNilId; x = nodes_[x].parent_id) { - nodes_[x].subtree_count[kCountStar] -= 1; - } - --node->value_count; - --node->subtree_count[kCountStar]; - - if (node->value_count > 0) { -#ifndef NDEBUG - ValidateTree(); -#endif - - return; - } - - for (index_t x = parent_id; x != kNilId; x = nodes_[x].parent_id) { - nodes_[x].subtree_count[kCountDistinctValue] -= 1; - } - --node->subtree_count[kCountDistinctValue]; - - if (node->child_id[0] != kNilId && node->child_id[1] != kNilId) { - index_t prev_node_id = node->child_id[0]; - while (nodes_[prev_node_id].child_id[1] != kNilId) { - prev_node_id = nodes_[prev_node_id].child_id[1]; - } - NodeType& prev_node = nodes_[prev_node_id]; - for (index_t x = nodes_[prev_node_id].parent_id; x != node_id; - x = nodes_[x].parent_id) { - nodes_[x].subtree_count[kCountStar] -= prev_node.value_count; - nodes_[x].subtree_count[kCountDistinctValue] -= 1; - } - index_t prev_node_parent_id = nodes_[prev_node_id].parent_id; - if (nodes_[prev_node_parent_id].child_id[0] == prev_node_id) { - nodes_[prev_node_parent_id].child_id[0] = nodes_[prev_node_id].child_id[0]; - } else { - nodes_[prev_node_parent_id].child_id[1] = nodes_[prev_node_id].child_id[0]; - } - if (nodes_[prev_node_id].child_id[0] != kNilId) { - nodes_[nodes_[prev_node_id].child_id[0]].parent_id = prev_node_parent_id; - } - nodes_[prev_node_id].parent_id = kNilId; - - node->value = prev_node.value; - node->value_count = prev_node.value_count; - - DeallocateNode(prev_node_id); - -#ifndef NDEBUG - ValidateTree(); -#endif - - return; - } - - for (int side = 0; side < 2; ++side) { - if (node->child_id[side] == kNilId) { - nodes_[parent_id].child_id[parent_side] = node->child_id[1 - side]; - nodes_[node->child_id[1 - side]].parent_id = parent_id; - if (parent_id == kNilId) { - root_id_ = node->child_id[1 - side]; - } else { - Splay(parent_id); - } - DeallocateNode(node_id); - -#ifndef NDEBUG - ValidateTree(); -#endif - - return; - } - } -} - -void SplayTree::Clear() { - nodes_.clear(); - empty_slots_.clear(); - root_id_ = kNilId; - nodes_.push_back(NodeType()); - nodes_[kNilId].value_count = 0; - for (int i = 0; i < 2; ++i) { - nodes_[kNilId].subtree_count[i] = 0; - } -} - -// Value does not need to be present -int64_t SplayTree::Rank(bool ties_low, int64_t value) { - index_t rank; - index_t parent_id; - int parent_side; - index_t node_id; - Find(value, kCountStar, &parent_id, &parent_side, &node_id, &rank); - if (ties_low) { - return rank + 1; - } - return rank + nodes_[node_id].value_count; -} - -// Value does not need to be present -int64_t SplayTree::DenseRank(int64_t value) { - index_t rank; - index_t parent_id; - int parent_side; - index_t node_id; - Find(value, kCountDistinctValue, &parent_id, &parent_side, &node_id, &rank); - return rank + 1; -} - -SplayTree::index_t SplayTree::AllocateNode() { - index_t new_node_id; - if (empty_slots_.empty()) { - new_node_id = static_cast(nodes_.size()); - nodes_.push_back(NodeType()); - } else { - new_node_id = empty_slots_.back(); - empty_slots_.pop_back(); - } - return new_node_id; -} - -void SplayTree::DeallocateNode(index_t node_id) { empty_slots_.push_back(node_id); } - -void SplayTree::SwitchParent(index_t old_parent_id, int old_child_side, - index_t new_parent_id, int new_child_side) { - NodeType& old_parent = nodes_[old_parent_id]; - NodeType& new_parent = nodes_[new_parent_id]; - index_t child_id = old_parent.child_id[old_child_side]; - NodeType& child = nodes_[child_id]; - index_t replaced_child_id = new_parent.child_id[new_child_side]; - NodeType& replaced_child = nodes_[replaced_child_id]; - - // New parent cannot be a child of old parent. - ARROW_DCHECK(new_parent.parent_id != old_parent_id); - - child.parent_id = new_parent_id; - replaced_child.parent_id = kNilId; - new_parent.child_id[new_child_side] = child_id; - old_parent.child_id[old_child_side] = kNilId; - - for (int i = 0; i < 2; ++i) { - new_parent.subtree_count[i] += - child.subtree_count[i] - replaced_child.subtree_count[i]; - old_parent.subtree_count[i] -= child.subtree_count[i]; - } -} - -// parent node | -// / \ / \ | -// node y --> x parent | -// / \ / \ | -// x mid mid y | -void SplayTree::Zig(index_t node_id, index_t parent_id, int parent_side) { - NodeType& node = nodes_[node_id]; - NodeType& parent = nodes_[parent_id]; - - // zig is only called when parent is the root of the tree - // - ARROW_DCHECK(parent.parent_id == kNilId); - - // Rearrange tree nodes - // - SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); - - // At this point we have: - // - // nil nil | - // | | | - // node + parent | - // / \ / \ | - // x nil mid y | - // - - // Connect parent to node - // - node.child_id[1 - parent_side] = parent_id; - parent.parent_id = node_id; - for (int i = 0; i < 2; ++i) { - node.subtree_count[i] += parent.subtree_count[i]; - } - root_id_ = node_id; -} - -// grandparent node | -// / \ / \ | -// parent y x parent | -// / \ --> / \ | -// node mid1 mid0 grandparent | -// / \ / \ | -// x mid0 mid1 y | -void SplayTree::ZigZig(index_t node_id, index_t parent_id, index_t grandparent_id, - int parent_side) { - NodeType& node = nodes_[node_id]; - NodeType& parent = nodes_[parent_id]; - NodeType& grandparent = nodes_[grandparent_id]; - - // Rearrange tree nodes. - // The order of the calls below is important. - // - SwitchParent(parent_id, 1 - parent_side, grandparent_id, parent_side); - SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); - - // At this point we have: - // - // nil nil z | - // | | | | - // node + parent + grandparent | - // / \ / \ / \ | - // x nil mid0 nil mid1 y | - // - - node.parent_id = grandparent.parent_id; - if (node.parent_id != kNilId) { - int side = (nodes_[node.parent_id].child_id[0] == grandparent_id) ? 0 : 1; - nodes_[node.parent_id].child_id[side] = node_id; - } - - // Connect grandparent to parent - // - parent.child_id[1 - parent_side] = grandparent_id; - grandparent.parent_id = parent_id; - for (int i = 0; i < 2; ++i) { - parent.subtree_count[i] += grandparent.subtree_count[i]; - } - - // Connect parent to node - // - node.child_id[1 - parent_side] = parent_id; - parent.parent_id = node_id; - for (int i = 0; i < 2; ++i) { - node.subtree_count[i] += parent.subtree_count[i]; - } - if (root_id_ == grandparent_id) { - root_id_ = node_id; - } -} - -// grandparent node | -// / \ / \ | -// parent y parent grandparent | -// / \ --> /\ / \ | -// x node x mid0 mid1 y | -// / \ | -// mid0 mid1 | -void SplayTree::ZigZag(index_t node_id, index_t parent_id, index_t grandparent_id, - int parent_side, int grandparent_side) { - NodeType& node = nodes_[node_id]; - NodeType& parent = nodes_[parent_id]; - NodeType& grandparent = nodes_[grandparent_id]; - - // Rearrange tree nodes. - // The order of the calls below is important. - // - SwitchParent(node_id, parent_side, grandparent_id, 1 - parent_side); - if (grandparent.child_id[1 - parent_side] != kNilId) { - for (int i = 0; i < 2; ++i) { - parent.subtree_count[i] -= - nodes_[grandparent.child_id[1 - parent_side]].subtree_count[i]; - } - } - SwitchParent(node_id, 1 - parent_side, parent_id, parent_side); - - // At this point we have: - // - // nil nil z | - // | | | | - // node + parent + grandparent | - // / \ / \ / \ | - // nil nil x mid0 mid1 y | - // - - node.parent_id = grandparent.parent_id; - if (node.parent_id != kNilId) { - int side = (nodes_[node.parent_id].child_id[0] == grandparent_id) ? 0 : 1; - nodes_[node.parent_id].child_id[side] = node_id; - } - - // Connect parent and grandparent to node - // - node.child_id[1 - parent_side] = parent_id; - node.child_id[parent_side] = grandparent_id; - parent.parent_id = node_id; - grandparent.parent_id = node_id; - for (int i = 0; i < 2; ++i) { - node.subtree_count[i] += parent.subtree_count[i] + grandparent.subtree_count[i]; - } - if (root_id_ == grandparent_id) { - root_id_ = node_id; - } -} - -void SplayTree::Splay(index_t node_id) { - for (;;) { - NodeType& node = nodes_[node_id]; - index_t parent_id = node.parent_id; - if (parent_id == kNilId) { - break; - } - NodeType& parent = nodes_[parent_id]; - int parent_side = (parent.child_id[0] == node_id ? 0 : 1); - index_t grandparent_id = parent.parent_id; - if (grandparent_id == kNilId) { - Zig(node_id, parent_id, parent_side); - continue; - } - NodeType& grandparent = nodes_[grandparent_id]; - int grandparent_side = (grandparent.child_id[0] == parent_id ? 0 : 1); - if (parent_side == grandparent_side) { - ZigZig(node_id, parent_id, grandparent_id, parent_side); - } else { - ZigZag(node_id, parent_id, grandparent_id, parent_side, grandparent_side); - } -#ifndef NDEBUG - ValidateTree(); -#endif - } -} - -// Find the node with the given value if exists. -// Otherwise find the place in the tree where the new value would be -// inserted (its parent and parent's child index). -// -void SplayTree::Find(int64_t value, int counter_id, index_t* parent_id, int* parent_side, - index_t* node_id, index_t* count_less) const { - *parent_id = kNilId; - *parent_side = 0; - *count_less = 0; - - *node_id = root_id_; - for (;;) { - if (*node_id == kNilId) { - return; - } - const NodeType& node = nodes_[*node_id]; - const NodeType& left_child = nodes_[node.child_id[0]]; - if (value == node.value) { - *count_less += left_child.subtree_count[counter_id]; - return; - } - int direction = value < node.value ? 0 : 1; - if (direction == 1) { - *count_less += left_child.subtree_count[counter_id] + - (counter_id == kCountStar ? node.value_count : 1); - } - *parent_id = *node_id; - *parent_side = direction; - *node_id = node.child_id[direction]; - } -} - -void SplayTree::ValidateVisit(index_t node_id, index_t* count, index_t* count_distinct) { - ARROW_DCHECK(node_id != kNilId); - ARROW_DCHECK(nodes_[node_id].parent_id == kNilId || - nodes_[nodes_[node_id].parent_id].child_id[0] == node_id || - nodes_[nodes_[node_id].parent_id].child_id[1] == node_id); - *count = nodes_[node_id].value_count; - *count_distinct = nodes_[node_id].value_count > 0 ? 1 : 0; - for (int side = 0; side < 2; ++side) { - if (nodes_[node_id].child_id[side] != kNilId) { - index_t count_child, count_distinct_child; - ARROW_DCHECK(nodes_[nodes_[node_id].child_id[side]].parent_id == node_id); - ValidateVisit(nodes_[node_id].child_id[side], &count_child, &count_distinct_child); - *count += count_child; - *count_distinct += count_distinct_child; - } - } - bool count_correct = (*count == nodes_[node_id].subtree_count[kCountStar]); - bool count_distinct_correct = - (*count_distinct == nodes_[node_id].subtree_count[kCountDistinctValue]); - if (!count_correct || !count_distinct_correct) { - Print(); - } - ARROW_DCHECK(count_correct); - ARROW_DCHECK(count_distinct_correct); -} - -void SplayTree::ValidateTree() { - index_t count = 0; - index_t count_distinct = 0; - if (root_id_ != kNilId) { - ValidateVisit(root_id_, &count, &count_distinct); - } - ARROW_DCHECK(nodes_.size() <= empty_slots_.size() + count_distinct + - /*extra one for kNilId*/ 1 + 1); -} - -template -int SplayTree::Print_StrLen(const T& value) { - std::string s = std::to_string(value); - return static_cast(s.length()); -} - -std::string SplayTree::Print_Label(index_t node_id) const { - const NodeType& node = nodes_[node_id]; - return std::string("(") + std::to_string(node.value) + "," + - std::to_string(node.value_count) + "," + std::to_string(node.subtree_count[0]) + - "," + std::to_string(node.subtree_count[1]) + ")"; -} - -void SplayTree::Print_BoxWH(index_t node_id, std::map& boxes) { - // Recursively compute box size for left and right child if they exist - // - bool has_child[2]; - for (int ichild = 0; ichild < 2; ++ichild) { - has_child[ichild] = (nodes_[node_id].child_id[ichild] != kNilId); - if (has_child[ichild]) { - Print_BoxWH(nodes_[node_id].child_id[ichild], boxes); - } - } - - PrintBox box; - box.x = box.y = 0; - int label_size = static_cast(Print_Label(node_id).length()); - - if (!has_child[0] && !has_child[1]) { - box.root_x = 0; - box.w = label_size; - box.h = 1; - } else if (has_child[0] && has_child[1]) { - // Both children - PrintBox left_box = boxes.find(nodes_[node_id].child_id[0])->second; - PrintBox right_box = boxes.find(nodes_[node_id].child_id[1])->second; - box.w = left_box.w + right_box.w + 1; - box.h = std::max(left_box.h, right_box.h) + 4; - int mid = (left_box.w + right_box.w + 1) / 2; - box.root_x = - std::min(std::max(mid, left_box.root_x), left_box.w + 1 + right_box.root_x); - box.w = std::max(box.w, box.root_x + label_size); - } else { - // One child - int ichild = (has_child[0] ? 0 : 1); - PrintBox child_box = boxes.find(nodes_[node_id].child_id[ichild])->second; - box.h = child_box.h + 4; - box.w = child_box.w; - box.root_x = box.w / 2; - box.w = std::max(box.w, box.root_x + label_size); - } - - boxes.insert(std::make_pair(node_id, box)); -} - -void SplayTree::Print_BoxXY(int x, int y, index_t node_id, - std::map& boxes) { - PrintBox& box = boxes.find(node_id)->second; - box.root_x += x; - box.x += x; - box.y += y; - bool has_child[2]; - for (int ichild = 0; ichild < 2; ++ichild) { - has_child[ichild] = (nodes_[node_id].child_id[ichild] != kNilId); - } - if (has_child[0] && has_child[1]) { - Print_BoxXY(x, y + 4, nodes_[node_id].child_id[0], boxes); - Print_BoxXY(x + boxes.find(nodes_[node_id].child_id[0])->second.w + 1, y + 4, - nodes_[node_id].child_id[1], boxes); - } else if (has_child[0] || has_child[1]) { - Print_BoxXY(x, y + 4, nodes_[node_id].child_id[has_child[0] ? 0 : 1], boxes); - } -} - -void SplayTree::Print_PutChar(std::vector>& canvas, int x, int y, - char c) { - if (y >= static_cast(canvas.size())) { - canvas.resize(y + 1); - } - if (x >= static_cast(canvas[y].size())) { - canvas[y].resize(x + 1); - } - canvas[y][x] = c; -} - -void SplayTree::Print_PutString(std::vector>& canvas, int x, int y, - std::string str) { - for (int i = 0; i < static_cast(str.length()); ++i) { - Print_PutChar(canvas, x + i, y, str[i]); - } -} - -void SplayTree::Print_Node(index_t node_id, std::map& boxes, - std::vector>& canvas) { - PrintBox box = boxes.find(node_id)->second; - Print_PutString(canvas, box.root_x, box.y, Print_Label(node_id)); - for (int ichild = 0; ichild < 2; ++ichild) { - if (nodes_[node_id].child_id[ichild] != kNilId) { - PrintBox child_box = boxes.find(nodes_[node_id].child_id[ichild])->second; - int top_x = child_box.root_x; - int bottom_x = box.root_x + ichild; - Print_PutChar(canvas, top_x, box.y + 3, '|'); - for (int x = std::min(bottom_x, top_x); x <= std::max(bottom_x, top_x); ++x) { - Print_PutChar(canvas, x, box.y + 2, '-'); - } - Print_PutChar(canvas, bottom_x, box.y + 1, '|'); - Print_Node(nodes_[node_id].child_id[ichild], boxes, canvas); - } - } -} - -void SplayTree::Print() { - if (root_id_ == kNilId) { - return; - } - std::map boxes; - Print_BoxWH(root_id_, boxes); - Print_BoxXY(0, 0, root_id_, boxes); - std::vector> canvas; - Print_Node(root_id_, boxes, canvas); - - const char* filename = "splay_tree_output.txt"; - FILE* fout; -#if defined(_MSC_VER) && _MSC_VER >= 1400 - fopen_s(&fout, filename, "wt"); -#else - fout = fopen(filename, "wt"); -#endif - - for (size_t y = 0; y < canvas.size(); ++y) { - for (size_t x = 0; x < canvas[y].size(); ++x) { - fprintf(fout, "%c", canvas[y][x]); - } - fprintf(fout, "\n"); - } - fclose(fout); -} - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/splay_tree.h b/cpp/src/arrow/compute/exec/window_functions/splay_tree.h deleted file mode 100644 index 12b982f9f6f..00000000000 --- a/cpp/src/arrow/compute/exec/window_functions/splay_tree.h +++ /dev/null @@ -1,139 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include -#include "arrow/compute/exec/util.h" // for ARROW_DCHECK - -namespace arrow { -namespace compute { - -class SplayTree { - public: - using index_t = int32_t; - - SplayTree(); - - void Insert(int64_t value); - - void Remove(int64_t value); - - void Clear(); - - // Value does not need to be present - int64_t Rank(bool ties_low, int64_t value); - - // Value does not need to be present - int64_t DenseRank(int64_t value); - - private: - static constexpr index_t kNilId = 0; - static constexpr int kCountStar = 0; - static constexpr int kCountDistinctValue = 1; - - struct NodeType { - int64_t value; - index_t value_count; - - index_t subtree_count[2]; - - index_t parent_id; - index_t child_id[2]; - }; - - std::vector nodes_; - index_t root_id_; - std::vector empty_slots_; - - index_t AllocateNode(); - - void DeallocateNode(index_t node_id); - - void SwitchParent(index_t old_parent_id, int old_child_side, index_t new_parent_id, - int new_child_side); - - // parent node | - // / \ / \ | - // node y --> x parent | - // / \ / \ | - // x mid mid y | - void Zig(index_t node_id, index_t parent_id, int parent_side); - - // grandparent node | - // / \ / \ | - // parent y x parent | - // / \ --> / \ | - // node mid1 mid0 grandparent | - // / \ / \ | - // x mid0 mid1 y | - void ZigZig(index_t node_id, index_t parent_id, index_t grandparent_id, - int parent_side); - - // grandparent node | - // / \ / \ | - // parent y parent grandparent | - // / \ --> /\ / \ | - // x node x mid0 mid1 y | - // / \ | - // mid0 mid1 | - void ZigZag(index_t node_id, index_t parent_id, index_t grandparent_id, int parent_side, - int grandparent_side); - - void Splay(index_t node_id); - - // Find the node with the given value if exists. - // Otherwise find the place in the tree where the new value would be - // inserted (its parent and parent's child index). - // - void Find(int64_t value, int counter_id, index_t* parent_id, int* parent_side, - index_t* node_id, index_t* count_less) const; - - void ValidateVisit(index_t node_id, index_t* count, index_t* count_distinct); - - void ValidateTree(); - - template - static int Print_StrLen(const T& value); - - struct PrintBox { - int x, y, w, h; - int root_x; - }; - - std::string Print_Label(index_t node_id) const; - - void Print_BoxWH(index_t node_id, std::map& boxes); - - void Print_BoxXY(int x, int y, index_t node_id, std::map& boxes); - - void Print_PutChar(std::vector>& canvas, int x, int y, char c); - - void Print_PutString(std::vector>& canvas, int x, int y, - std::string str); - - void Print_Node(index_t node_id, std::map& boxes, - std::vector>& canvas); - - void Print(); -}; - -} // namespace compute -} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/window_frame.h b/cpp/src/arrow/compute/exec/window_functions/window_frame.h index 4e507eb0f3e..6752c005787 100644 --- a/cpp/src/arrow/compute/exec/window_functions/window_frame.h +++ b/cpp/src/arrow/compute/exec/window_functions/window_frame.h @@ -18,34 +18,53 @@ #pragma once #include -#include +#include #include "arrow/compute/exec/util.h" namespace arrow { namespace compute { +// A collection of window frames for a sequence of rows in the window frame sort +// order. +// struct WindowFrames { - static constexpr int kMaxRangesInFrame = 3; + // Every frame is associated with a single row. + // + // This is the index of the first row (in the window frame sort order) for the + // first frame. + // + int64_t first_row_index; - int num_ranges_in_frame; + // Number of frames in this collection + // int64_t num_frames; + // Maximum number of ranges that make up each single frame. + // + static constexpr int kMaxRangesInFrame = 3; + + // Number of ranges that make up each single frame. + // Every frame will have exactly that many ranges, but any number of these + // ranges can be empty. + // + int num_ranges_per_frame; + // Range can be empty, in that case begin == end. Otherwise begin < end. // - // Ranges in a single frame must be disjoint but begin of next range can be - // equal to the end of the previous one. + // Ranges in a single frame must be disjoint but beginning of next range can + // be equal to the end of the previous one. + // + // Beginning of each next range must be greater or equal to the end of the + // previous range. // const int64_t* begins[kMaxRangesInFrame]; const int64_t* ends[kMaxRangesInFrame]; - // Row filter has bits set to 0 for rows that should not be included in the - // range. + // Check if a collection of frames represents sliding frames, + // that is for every boundary (left and right) of every range, the values + // across all frames are non-decreasing. // - // Null row filter means that all rows are qualified. - // - const uint8_t* row_filter; - - bool FramesProgressing() const { + bool IsSliding() const { for (int64_t i = 1; i < num_frames; ++i) { if (!(begins[i] >= begins[i - 1] && ends[i] >= ends[i - 1])) { return false; @@ -54,7 +73,13 @@ struct WindowFrames { return true; } - bool FramesExpanding() const { + // Check if a collection of frames represent cumulative frames, + // that is for every range, two adjacent frames either share the same + // beginning with end of the later one being no lesser than the end of the + // previous one, or the later one begins at or after the end of the previous + // one. + // + bool IsCummulative() const { for (int64_t i = 1; i < num_frames; ++i) { if (!((begins[i] >= ends[i - 1] || begins[i] == begins[i - 1]) && (ends[i] >= ends[i - 1]))) { @@ -63,81 +88,23 @@ struct WindowFrames { } return true; } -}; - -inline void GenerateTestFrames(Random64BitCopy& rand, int64_t num_rows, - std::vector& begins, std::vector& ends, - bool progressive, bool expansive) { - begins.resize(num_rows); - ends.resize(num_rows); - - if (!progressive && !expansive) { - constexpr int64_t max_frame_length = 100; - for (int64_t i = 0; i < num_rows; ++i) { - int64_t length = - rand.from_range(static_cast(0), std::min(num_rows, max_frame_length)); - int64_t begin = rand.from_range(static_cast(0), num_rows - length); - begins[i] = begin; - ends[i] = begin + length; - } - } else if (progressive && !expansive) { - int64_t dist = rand.from_range(static_cast(1), - std::max(static_cast(1), num_rows / 4)); - std::vector pos; - for (int64_t i = 0; i < num_rows + dist; ++i) { - pos.push_back(rand.from_range(static_cast(0), num_rows)); - } - std::sort(pos.begin(), pos.end()); - for (int64_t i = 0; i < num_rows; ++i) { - begins[i] = pos[i]; - ends[i] = pos[i + dist]; - } - } else { - int64_t num_partitions = - rand.from_range(static_cast(1), bit_util::CeilDiv(num_rows, 128LL)); - std::set partition_ends_set; - std::vector partition_ends; - partition_ends_set.insert(num_rows); - partition_ends.push_back(num_rows); - for (int64_t i = 1; i < num_partitions; ++i) { - int64_t partition_end; - for (;;) { - partition_end = rand.from_range(static_cast(1), num_rows - 1); - if (partition_ends_set.find(partition_end) == partition_ends_set.end()) { - break; - } - } - partition_ends.push_back(partition_end); - partition_ends_set.insert(partition_end); - } - std::sort(partition_ends.begin(), partition_ends.end()); - for (int64_t ipartition = 0; ipartition < num_partitions; ++ipartition) { - int64_t partition_begin = ipartition == 0 ? 0LL : partition_ends[ipartition - 1]; - int64_t partition_end = partition_ends[ipartition]; - int64_t partition_length = partition_end - partition_begin; - int64_t begin = rand.from_range(0LL, 2LL); - if (begin >= partition_length) { - begin = partition_length - 1; - } - int64_t end = begin + rand.from_range(0LL, 2LL); - if (end > partition_length) { - end = partition_length; - } - begins[partition_begin + 0] = partition_begin + begin; - ends[partition_begin + 0] = partition_begin + end; - for (int64_t i = 1; i < partition_length; ++i) { - int64_t end_step = rand.from_range(0LL, 2LL); - end += end_step; - if (end > partition_length) { - end = partition_length; - } - begins[partition_begin + i] = partition_begin + begin; - ends[partition_begin + i] = partition_begin + end; - } + // Check if the row for which the frame is defined is included in any of the + // ranges defining that frame. + // + bool IsRowInsideItsFrame(int64_t frame_index) const { + bool is_inside = false; + int64_t row_index = first_row_index + frame_index; + for (int64_t range_index = 0; range_index < num_ranges_per_frame; ++range_index) { + int64_t range_begin = begins[range_index][frame_index]; + int64_t range_end = ends[range_index][frame_index]; + is_inside = is_inside || (row_index >= range_begin && row_index < range_end); } + return is_inside; } -} +}; + +enum class WindowFrameSequenceType { CUMMULATIVE, SLIDING, GENERIC }; } // namespace compute -} // namespace arrow \ No newline at end of file +} // namespace arrow diff --git a/cpp/src/arrow/compute/exec/window_functions/window_rank.cc b/cpp/src/arrow/compute/exec/window_functions/window_rank.cc index cf30e06399a..a8601f5644f 100644 --- a/cpp/src/arrow/compute/exec/window_functions/window_rank.cc +++ b/cpp/src/arrow/compute/exec/window_functions/window_rank.cc @@ -20,352 +20,228 @@ namespace arrow { namespace compute { -class GroupPrevRankCalculator { - public: - GroupPrevRankCalculator(int64_t num_rows, const int64_t* group_ids_sorted, - const int64_t* permutation) { - int64_t num_bit_words = bit_util::CeilDiv(num_rows, 64); - - last_in_group_bitvec_.resize(num_bit_words); - memset(last_in_group_bitvec_.data(), 0, num_bit_words * sizeof(uint64_t)); - for (int64_t i = 0; i < num_rows; ++i) { - bool last_in_group = - (i == (num_rows - 1) || group_ids_sorted[i + 1] != group_ids_sorted[i]); - if (last_in_group) { - bit_util::SetBit(reinterpret_cast(last_in_group_bitvec_.data()), - permutation[i]); - } - } - last_in_group_popcounts_.resize(num_bit_words); - BitVectorNavigator::GenPopCounts(num_rows, last_in_group_bitvec_.data(), - last_in_group_popcounts_.data()); +void WindowRank_Global::Eval(RankType rank_type, const BitVectorNavigator& tie_begins, + int64_t batch_begin, int64_t batch_end, int64_t* results) { + int64_t num_rows = tie_begins.bit_count(); - num_rows_ = num_rows; - num_groups_ = BitVectorNavigator::PopCount(num_rows, last_in_group_bitvec_.data(), - last_in_group_popcounts_.data()); - - first_in_group_bitvec_.resize(num_bit_words); - memset(first_in_group_bitvec_.data(), 0, num_bit_words * sizeof(uint64_t)); - for (int64_t i = 0; i < num_rows; ++i) { - bool first_in_group = (i == 0 || group_ids_sorted[i - 1] != group_ids_sorted[i]); - if (first_in_group) { - bit_util::SetBit(reinterpret_cast(first_in_group_bitvec_.data()), - permutation[i]); - } - } - first_in_group_popcounts_.resize(num_bit_words); - BitVectorNavigator::GenPopCounts(num_rows, first_in_group_bitvec_.data(), - first_in_group_popcounts_.data()); + if (rank_type == RankType::ROW_NUMBER) { + std::iota(results, results + batch_end - batch_begin, 1LL + batch_begin); + return; } - // Prev is 0-based row number - // Returns 0-based row number - int64_t Rank(int64_t row_number, int64_t prev) { - if (bit_util::GetBit(reinterpret_cast(first_in_group_bitvec_.data()), - row_number)) { - return BitVectorNavigator::Rank(row_number, first_in_group_bitvec_.data(), - first_in_group_popcounts_.data()); - } else { - return num_groups_ + prev - - BitVectorNavigator::Rank(prev, last_in_group_bitvec_.data(), - last_in_group_popcounts_.data()); + if (rank_type == RankType::DENSE_RANK) { + for (int64_t i = batch_begin; i < batch_end; ++i) { + results[i - batch_begin] = tie_begins.RankNext(i); } + return; } - // Prev is 0-based row number - // Returns 0-based row number - int64_t RankEnd(int64_t prev_end) { - if (prev_end == num_rows_) { - return num_rows_; + + if (rank_type == RankType::RANK_TIES_LOW) { + int64_t rank = tie_begins.Select(tie_begins.RankNext(batch_begin) - 1); + ARROW_DCHECK( + tie_begins.RankNext(rank) == tie_begins.RankNext(batch_begin) && + (rank == 0 || tie_begins.RankNext(rank - 1) < tie_begins.RankNext(rank))); + rank += 1; + for (int64_t i = batch_begin; i < batch_end; ++i) { + rank = (tie_begins.GetBit(i) != 0) ? i + 1 : rank; + results[i - batch_begin] = rank; } - return num_groups_ + prev_end - - BitVectorNavigator::Rank(prev_end, last_in_group_bitvec_.data(), - last_in_group_popcounts_.data()); + return; } - private: - int64_t num_rows_; - int64_t num_groups_; - std::vector last_in_group_bitvec_; - std::vector last_in_group_popcounts_; - std::vector first_in_group_bitvec_; - std::vector first_in_group_popcounts_; -}; - -void WindowRank::Global(RankType rank_type, int64_t num_rows, const uint64_t* ties_bitvec, - const uint64_t* ties_popcounts, int64_t* output, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - switch (rank_type) { - case RankType::RANK_TIES_LOW: - case RankType::RANK_TIES_HIGH: - GlobalRank(rank_type == RankType::RANK_TIES_LOW, num_rows, ties_bitvec, - ties_popcounts, output, hardware_flags, temp_vector_stack); - break; - case RankType::DENSE_RANK: - GlobalDenseRank(num_rows, ties_bitvec, ties_popcounts, output); - break; - case RankType::ROW_NUMBER: - GlobalRowNumber(num_rows, output); - break; + if (rank_type == RankType::RANK_TIES_HIGH) { + int64_t rank_max = tie_begins.pop_count(); + int64_t rank_last = tie_begins.RankNext(batch_end - 1); + int64_t rank = (rank_last == rank_max) ? num_rows : tie_begins.Select(rank_last); + for (int64_t i = batch_end - 1; i >= batch_begin; --i) { + results[i - batch_begin] = rank; + rank = (tie_begins.GetBit(i) != 0) ? i : rank; + } + return; } } -void WindowRank::WithinFrame(RankType rank_type, int64_t num_rows, - const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, - const int64_t* frame_begins, const int64_t* frame_ends, - int64_t* output, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - switch (rank_type) { - case RankType::RANK_TIES_LOW: - case RankType::RANK_TIES_HIGH: - RankWithinFrame(rank_type == RankType::RANK_TIES_LOW, num_rows, ties_bitvec, - ties_popcounts, frame_begins, frame_ends, output, hardware_flags, - temp_vector_stack); - break; - case RankType::DENSE_RANK: - DenseRankWithinFrame(num_rows, ties_bitvec, ties_popcounts, frame_begins, - frame_ends, output); - break; - case RankType::ROW_NUMBER: - RowNumberWithinFrame(num_rows, frame_begins, frame_ends, output); - break; - } -} +void WindowRank_Framed1D::Eval(RankType rank_type, const BitVectorNavigator& tie_begins, + const WindowFrames& frames, int64_t* results) { + if (rank_type == RankType::RANK_TIES_LOW) { + // We will compute global rank into the same array as the one provided for + // the output (to avoid allocating another array). + // + // When computing rank for a given row we will only read the result + // computed for that row (no access to other rows) and update the same + // result array entry. + // + int64_t* global_ranks = results; + WindowRank_Global::Eval(RankType::RANK_TIES_LOW, tie_begins, frames.first_row_index, + frames.first_row_index + frames.num_frames, global_ranks); -void WindowRank::OnSeparateAttribute(RankType rank_type, int64_t num_rows, - const int64_t* global_ranks_sorted, - const int64_t* permutation, bool progressive_frames, - const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - switch (rank_type) { - case RankType::ROW_NUMBER: - case RankType::RANK_TIES_LOW: - case RankType::RANK_TIES_HIGH: - if (!progressive_frames) { - SeparateAttributeRank(rank_type == RankType::RANK_TIES_LOW, num_rows, - frame_begins, frame_ends, global_ranks_sorted, permutation, - output, hardware_flags, temp_vector_stack); - } else { - ProgressiveSeparateAttributeRank( - /*dense_rank=*/false, rank_type == RankType::RANK_TIES_LOW, num_rows, - frame_begins, frame_ends, global_ranks_sorted, permutation, output); - } - break; - case RankType::DENSE_RANK: - if (!progressive_frames) { - SeparateAttributeDenseRank(num_rows, frame_begins, frame_ends, - global_ranks_sorted, permutation, output, - hardware_flags, temp_vector_stack); - } else { - ProgressiveSeparateAttributeRank( - /*dense_rank=*/true, false, num_rows, frame_begins, frame_ends, - global_ranks_sorted, permutation, output); + // The rank is 1 + the number of rows with key strictly lower than the + // current row's key. + // + for (int64_t frame_index = 0; frame_index < frames.num_frames; ++frame_index) { + // If the frame does not contain current row it is still logically + // considered as included in the frame (e.g. empty frame will yield rank + // 1 since the set we look at consists of a single row, the current + // row). + // + int64_t rank = 1; + for (int range_index = 0; range_index < frames.num_ranges_per_frame; + ++range_index) { + int64_t global_rank = global_ranks[frame_index]; + int64_t range_begin = frames.begins[range_index][frame_index]; + int64_t range_end = frames.ends[range_index][frame_index]; + + // The formula below takes care of the cases: + // a) current row outside of the range to the left, + // b) current row in the range and ties with the first row in the + // range, + // c) current row in the range and no tie with the first row in + // the range, + // d) current row outside of the range to the right and + // ties with the last row in the range. + // e) current row outside of the range to the right and does no tie + // with the last row in the range. + // f) empty frame range, + // + rank += std::max(static_cast(0), + std::min(global_rank, range_end + 1) - range_begin - 1); } - break; - } -} -void WindowRank::GlobalRank(bool ties_low, int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, int64_t* output, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - // Range of indices for groups of ties in entire input - int64_t rank_begin = 0; - int64_t rank_end = BitVectorNavigator::PopCount(num_rows, bitvec, popcounts); - - // Break groups of ties into minibatches - int64_t minibatch_length_max = util::MiniBatch::kMiniBatchLength - 1; - auto selects_buf = util::TempVectorHolder( - temp_vector_stack, static_cast(minibatch_length_max + 1)); - auto selects = selects_buf.mutable_data(); - for (int64_t minibatch_begin = rank_begin; minibatch_begin < rank_end; - minibatch_begin += minibatch_length_max) { - int64_t minibatch_end = std::min(rank_end, minibatch_begin + minibatch_length_max); - - // Get first (and last) row number for each group of ties in minibatch - BitVectorNavigator::SelectsForRangeOfRanks(minibatch_begin, minibatch_end + 1, - num_rows, bitvec, popcounts, selects, - hardware_flags, temp_vector_stack); - if (ties_low) { - for (int64_t ties_group = 0; ties_group < minibatch_end - minibatch_begin; - ++ties_group) { - for (int64_t i = selects[ties_group]; i < selects[ties_group + 1]; ++i) { - output[i] = selects[ties_group] + 1; - } - } - } else { - for (int64_t ties_group = 0; ties_group < minibatch_end - minibatch_begin; - ++ties_group) { - for (int64_t i = selects[ties_group]; i < selects[ties_group + 1]; ++i) { - output[i] = selects[ties_group + 1]; - } - } + results[frame_index] = rank; } } -} -void WindowRank::GlobalDenseRank(int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, int64_t* output) { - for (int64_t i = 0; i < num_rows; ++i) { - output[i] = BitVectorNavigator::RankNext(i, bitvec, popcounts); - } -} + if (rank_type == RankType::RANK_TIES_HIGH) { + // To compute TIES_HIGH variant, we can reverse boundaries, + // global ranks by substracting their values from num_rows + // and num_rows + 1 respectively, and we will get the same problem as + // TIES_LOW, the result of which we can convert back using the same + // method but this time using number of rows inside the frame instead of + // global number of rows. + // + // That is how the formula used below was derived. + // + // Note that the number of rows considered to be in the frame depends + // whether the current row is inside or outside of the ranges defining its + // frame, because in the second case we need to add 1 to the total size of + // ranges. + // + int64_t* global_ranks = results; + WindowRank_Global::Eval(RankType::RANK_TIES_HIGH, tie_begins, frames.first_row_index, + frames.first_row_index + frames.num_frames, global_ranks); -void WindowRank::GlobalRowNumber(int64_t num_rows, int64_t* output) { - std::iota(output, output + num_rows, 1LL); -} + for (int64_t frame_index = 0; frame_index < frames.num_frames; ++frame_index) { + int64_t rank = 0; -void WindowRank::RankWithinFrame(bool ties_low, int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { - GlobalRank(ties_low, num_rows, bitvec, popcounts, output, hardware_flags, - temp_vector_stack); - for (int64_t i = 0; i < num_rows; ++i) { - // If the frame does not contain current row it is still logically - // considered as included in the frame (e.g. empty frame will yield rank - // 1 since the set we look at consists of a single row - current row). - - // The case of an empty frame - if (frame_begins[i] >= frame_ends[i]) { - output[i] = 1; - continue; - } + for (int range_index = 0; range_index < frames.num_ranges_per_frame; + ++range_index) { + int64_t global_rank = global_ranks[frame_index]; + int64_t range_begin = frames.begins[range_index][frame_index]; + int64_t range_end = frames.ends[range_index][frame_index]; - bool tie_with_first = - BitVectorNavigator::RankNext(i, bitvec, popcounts) == - BitVectorNavigator::RankNext(frame_begins[i], bitvec, popcounts); - bool tie_with_last = - BitVectorNavigator::RankNext(i, bitvec, popcounts) == - BitVectorNavigator::RankNext(frame_ends[i] - 1, bitvec, popcounts); - if (!tie_with_first) { - if (i < frame_begins[i]) { - output[i] = 1; - } else if (i >= frame_ends[i]) { - if (tie_with_last) { - if (ties_low) { - output[i] -= frame_begins[i]; - } else { - output[i] = frame_ends[i] - frame_begins[i] + 1; - } - } else { - output[i] = frame_ends[i] - frame_begins[i] + 1; - } - } else { - if (tie_with_last && !ties_low) { - output[i] = frame_ends[i] - frame_begins[i]; - } else { - output[i] -= frame_begins[i]; - } - } - } else { - if (tie_with_last) { - output[i] = ties_low ? 1 - : frame_ends[i] - frame_begins[i] + - ((i < frame_begins[i] || i >= frame_ends[i]) ? 1 : 0); - } else { - // Bit vector rank of current row is the same as the beginning of - // the frame but different than for the last row of the frame, which - // means that current row must appear before the last row of the - // frame. - // - ARROW_DCHECK(i < frame_ends[i]); - if (ties_low) { - output[i] = 1; - } else { - if (i < frame_begins[i]) { - output[i] -= frame_begins[i] - 1; - } else { - output[i] -= frame_begins[i]; - } - } + rank += std::min(range_end, std::max(global_rank, range_begin)) - range_begin; } + + rank += frames.IsRowInsideItsFrame(frame_index) ? 0 : 1; + + results[frame_index] = rank; } } -} -void WindowRank::DenseRankWithinFrame(int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, - const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output) { - for (int64_t i = 0; i < num_rows; ++i) { - if (frame_begins[i] >= frame_ends[i]) { - output[i] = 1; - continue; - } + if (rank_type == RankType::ROW_NUMBER) { + // Count rows inside the frame coming before the current row and add 1. + // + for (int64_t frame_index = 0; frame_index < frames.num_frames; ++frame_index) { + int64_t row_index = frames.first_row_index + frame_index; + int64_t rank = 1; + for (int range_index = 0; range_index < frames.num_ranges_per_frame; + ++range_index) { + int64_t range_begin = frames.begins[range_index][frame_index]; + int64_t range_end = frames.ends[range_index][frame_index]; + + rank += std::max(static_cast(0), + std::min(row_index, range_end) - range_begin); + } - if (i < frame_begins[i]) { - output[i] = 1; - continue; - } - if (i >= frame_ends[i]) { - bool tie_with_last = - BitVectorNavigator::RankNext(i, bitvec, popcounts) == - BitVectorNavigator::RankNext(frame_ends[i] - 1, bitvec, popcounts); - output[i] = BitVectorNavigator::RankNext(frame_ends[i] - 1, bitvec, popcounts) - - BitVectorNavigator::RankNext(frame_begins[i], bitvec, popcounts) + 1 + - (tie_with_last ? 0 : 1); - continue; + results[frame_index] = rank; } + } + + if (rank_type == RankType::DENSE_RANK) { + for (int64_t frame_index = 0; frame_index < frames.num_frames; ++frame_index) { + int64_t row_index = frames.first_row_index + frame_index; + int64_t rank = 1; + + // gdr = global dense rank + // + // Note that computing global dense rank corresponds to calling + // tie_begin.RankNext(). + // + int64_t highest_gdr_seen = 0; + int64_t gdr = tie_begins.RankNext(row_index); + + for (int range_index = 0; range_index < frames.num_ranges_per_frame; + ++range_index) { + int64_t range_begin = frames.begins[range_index][frame_index]; + int64_t range_end = frames.ends[range_index][frame_index]; + + if (row_index < range_begin || range_end == range_begin) { + // Empty frame and frame starting after the current row - nothing to + // do. + // + } else { + // Count how many NEW peer groups before the current row's peer + // group are introduced by each range. + // + // Take into account when the last row of the previous range is in + // the same peer group as the first row of the next range. + // + int64_t gdr_first = tie_begins.RankNext(range_begin); + int64_t gdr_last = tie_begins.RankNext(range_end - 1); + int64_t new_peer_groups = std::max( + static_cast(0), std::min(gdr_last, gdr - 1) - + std::max(highest_gdr_seen + 1, gdr_first) + 1); + rank += new_peer_groups; + highest_gdr_seen = gdr_last; + } + } - output[i] = BitVectorNavigator::RankNext(i, bitvec, popcounts) - - BitVectorNavigator::RankNext(frame_begins[i], bitvec, popcounts) + 1; + results[frame_index] = rank; + } } } -void WindowRank::RowNumberWithinFrame(int64_t num_rows, const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output) { - for (int64_t i = 0; i < num_rows; ++i) { - if (frame_begins[i] >= frame_ends[i]) { - output[i] = 1; - continue; - } +Status WindowRank_Framed2D::Eval(RankType rank_type, + const BitVectorNavigator& rank_key_tie_begins, + const int64_t* order_by_rank_key, + const WindowFrames& frames, int64_t* results, + ThreadContext& thread_context) { + int64_t num_rows = rank_key_tie_begins.bit_count(); - if (i < frame_begins[i]) { - output[i] = 1; - continue; + if (rank_type == RankType::DENSE_RANK) { + if (frames.IsSliding()) { + return DenseRankWithSplayTree(); + } else { + return DenseRankWithRangeTree(); } + } - if (i >= frame_ends[i]) { - output[i] = frame_ends[i] - frame_begins[i] + 1; - continue; - } + ARROW_DCHECK(rank_type == RankType::ROW_NUMBER || + rank_type == RankType::RANK_TIES_LOW || + rank_type == RankType::RANK_TIES_HIGH); - output[i] = i - frame_begins[i] + 1; - } -} + ParallelForStream exec_plan; -void WindowRank::SeparateAttributeRank( - bool ties_low, - /* number of rows and number of frames */ - int64_t num_rows, const int64_t* begins, const int64_t* ends, - /* Sorted (in ascending order) ranks (with respect to ranking attribute) - for all rows */ - /* null can be passed if all ranks are distinct (in which case the - sorted array would just contain sequence of integers from 1 to num_rows). - Supplying null changes the semantics from rank to row number. */ - const int64_t* ranks_sorted, - /* Permutation of row numbers that results in sortedness on ranking - attribute */ - const int64_t* permutation, int64_t* output, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack) { // Build merge tree // MergeTree merge_tree; - merge_tree.Build(num_rows, permutation, - /* number of top levels to skip */ 0, hardware_flags, - temp_vector_stack); - - // Ties low means outputting the number of rows in window frame with rank - // lower than current row plus 1. Initialize output counter accordingly. - // - for (int64_t i = 0; i < num_rows; ++i) { - bool outside_of_frame = i < begins[i] || i >= ends[i]; - output[i] = (ties_low || outside_of_frame) ? 1 : 0; - } + std::vector order_by_rank_key_copy(num_rows); + memcpy(order_by_rank_key_copy.data(), order_by_rank_key, + num_rows * sizeof(order_by_rank_key[0])); + RETURN_NOT_OK(merge_tree.Build(num_rows, + /*level_begin=*/bit_util::Log2(num_rows), + order_by_rank_key_copy.data(), exec_plan)); + RETURN_NOT_OK(exec_plan.RunOnSingleThread(thread_context)); // For each row compute the number of rows with the lower rank (lower or // equal in case of ties high). @@ -373,481 +249,206 @@ void WindowRank::SeparateAttributeRank( // This will be used as an upper bound on rank attribute when querying // merge tree. // - std::vector y_ends(num_rows); - int64_t first_in_group; - int64_t group_size; - for (int64_t i = 0; i < num_rows; ++i) { - if (i == 0 || ranks_sorted[i] != ranks_sorted[i - 1]) { - first_in_group = i; - group_size = 1; - for (int64_t j = i + 1; j < num_rows; ++j) { - if (ranks_sorted[j] == ranks_sorted[i]) { - ++group_size; - } - } - } - y_ends[permutation[i]] = ties_low ? first_in_group : first_in_group + group_size; - } - - BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) - - merge_tree.MiniBatchRangeQuery(batch_length, begins + batch_begin, ends + batch_begin, - y_ends.data() + batch_begin, temp_vector_stack, - [&](int64_t iquery, int64_t node_begin, int64_t y_end) { - output[batch_begin + iquery] += y_end - node_begin; - }); - - END_MINI_BATCH_FOR -} - -void WindowRank::SeparateAttributeDenseRank( - int64_t num_rows, const int64_t* begins, const int64_t* ends, - /* The following two arrays must be the result of sorting rows on - (global dense rank, row number within window) pairs. Within a group of - peers with that same dense ranks rows must come in the order in which - they appeared in the window. This could be accomplished by a stable - sort of window on the dense rank value. - */ - const int64_t* global_dense_ranks_sorted, const int64_t* permutation, int64_t* output, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack) { - // Mapping to the coordinates (x, y, z) used by range tree is described - // below. - // - // Definitions are picked so that for each attribute every row has a - // distinct coordinate in the range [0, num_rows). - // - // The coordinates correspond to position in the sorted array for - // different sort orders: x - sorting on previous occurrence of the row - // with the same global dense rank y - window ordering z - sorting on - // global dense rank - // - // x: - // - for the first row in each dense rank group, dense rank minus 1, - // for other rows, number of rows preceding the previous row in the same - // dense rank group that are not the last in their dense rank groups. - // Alternative way of viewing this is the position in the array sorted on - // the following function: 1 plus position in the window sort order of the - // previous occurrence of the row with the same dense rank (current row's - // peer in the global dense rank group) or 0 if there is no previous - // occurrence. - // - exclusive upper bound for this attribute in the range query for ith - // frame is: number of dense rank groups plus the number of rows preceding - // begins[i] in the window order that are not the last in their respective - // dense rank groups. - // Alternative way of viewing this is std::lower_bound for begins[i] + 1 - // in the sorted array introduced in the description of an alternative - // view of x attribute. - // - // y: - // - row number in the window sort order (sort order used to - // compute frame boundaries) - // - range query uses begins[i], ends[i] as a range filter on this - // attribute for ith frame - // - // z: - // - position in the array sorted on global dense rank - // - range query uses number of rows with global dense rank less than that - // of the current row of ith frame - // - GroupPrevRankCalculator x_calc(num_rows, global_dense_ranks_sorted, permutation); - - RangeTree tree; + std::vector y_ends; + std::swap(order_by_rank_key_copy, y_ends); + auto temp_vector_stack = thread_context.temp_vector_stack; { - std::vector x_sorted_on_z(num_rows); - for (int64_t i = 0; i < num_rows; ++i) { - bool has_next = (i < num_rows - 1) && - (global_dense_ranks_sorted[i] == global_dense_ranks_sorted[i + 1]); - if (has_next) { - int64_t y = permutation[i + 1]; - int64_t z = i + 1; - int64_t prev = permutation[i]; - int64_t x = x_calc.Rank(y, prev); - x_sorted_on_z[z] = x; - } - bool has_prev = - (i > 0) && (global_dense_ranks_sorted[i] == global_dense_ranks_sorted[i - 1]); - if (!has_prev) { - int64_t y = permutation[i]; - int64_t z = i; - int64_t x = x_calc.Rank(y, -1); - x_sorted_on_z[z] = x; + TEMP_VECTOR(int64_t, global_ranks); + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) + WindowRank_Global::Eval(rank_type, rank_key_tie_begins, batch_begin, + batch_begin + batch_length, global_ranks); + if (rank_type == RankType::RANK_TIES_LOW || rank_type == RankType::ROW_NUMBER) { + for (int64_t i = 0; i < batch_length; ++i) { + --global_ranks[i]; } } - - const int64_t* y_sorted_on_z = permutation; - tree.Build(num_rows, x_sorted_on_z.data(), y_sorted_on_z, hardware_flags, - temp_vector_stack); - } - - // For each frame compute upper bound on z coordinate - // - std::vector z_ends(num_rows); - int64_t first_in_group; - for (int64_t i = 0; i < num_rows; ++i) { - bool is_first_in_group = - (i == 0) || global_dense_ranks_sorted[i - 1] != global_dense_ranks_sorted[i]; - if (is_first_in_group) { - first_in_group = i; + for (int64_t i = 0; i < batch_length; ++i) { + int64_t row_index = order_by_rank_key[batch_begin + i]; + y_ends[row_index] = global_ranks[i]; } - z_ends[permutation[i]] = first_in_group; - } - - TEMP_VECTOR(int64_t, x_ends); - - BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, num_rows) - - for (int64_t i = batch_begin; i < batch_begin + batch_length; ++i) { - x_ends[i - batch_begin] = x_calc.RankEnd(begins[i]); + END_MINI_BATCH_FOR } - tree.BoxCount(batch_length, x_ends, begins + batch_begin, ends + batch_begin, - z_ends.data() + batch_begin, output + batch_begin, hardware_flags, - temp_vector_stack); + BEGIN_MINI_BATCH_FOR(batch_begin, batch_length, frames.num_frames) - // Output is 1 plus the number of rows satisfying range query + // Execute box count queries one batch of frames at a time. // - for (int64_t i = batch_begin; i < batch_begin + batch_length; ++i) { - ++output[i]; - } - - END_MINI_BATCH_FOR -} - -void WindowRank::ProgressiveSeparateAttributeRank(bool dense_rank, bool ties_low, - int64_t num_rows, const int64_t* begins, - const int64_t* ends, - const int64_t* global_ranks_sorted, - const int64_t* permutation, - int64_t* output) { - if (dense_rank) { - ProgressiveSeparateAttributeRankImp(false, num_rows, begins, ends, - global_ranks_sorted, permutation, output); - } else { - ProgressiveSeparateAttributeRankImp(ties_low, num_rows, begins, ends, - global_ranks_sorted, permutation, output); - } -} - -template -void WindowRank::ProgressiveSeparateAttributeRankImp( - bool ties_low, int64_t num_rows, const int64_t* begins, const int64_t* ends, - const int64_t* global_ranks_sorted, const int64_t* permutation, int64_t* output) { - SplayTree tree; - int64_t begin = begins[0]; - int64_t end = begin; - - std::vector global_ranks(num_rows); - for (int64_t i = 0; i < num_rows; ++i) { - global_ranks[permutation[i]] = global_ranks_sorted[i]; + const int64_t* x_begins_batch[WindowFrames::kMaxRangesInFrame]; + const int64_t* x_ends_batch[WindowFrames::kMaxRangesInFrame]; + for (int64_t range_index = 0; range_index < frames.num_ranges_per_frame; + ++range_index) { + x_begins_batch[range_index] = frames.begins[range_index] + batch_begin; + x_ends_batch[range_index] = frames.ends[range_index] + batch_begin; } - - for (int64_t iframe = 0; iframe < num_rows; ++iframe) { - int64_t frame_begin = begins[iframe]; - int64_t frame_end = ends[iframe]; - ARROW_DCHECK(frame_begin >= begin && frame_end >= end); - - if (end <= frame_begin) { - tree.Clear(); - begin = end = frame_begin; - } - - while (begin < frame_begin) { - tree.Remove(global_ranks[begin++]); - ARROW_DCHECK(begin <= end); - } - while (frame_end > end) { - tree.Insert(global_ranks[end++]); + const int64_t* y_ends_batch = y_ends.data() + frames.first_row_index + batch_begin; + int64_t* results_batch = results + batch_begin; + merge_tree.BoxCountQuery(batch_length, frames.num_ranges_per_frame, x_begins_batch, + x_ends_batch, y_ends_batch, results_batch, thread_context); + + if (rank_type == RankType::RANK_TIES_LOW || rank_type == RankType::ROW_NUMBER) { + // For TIES_LOW and ROW_NUMBER we need to add 1 to the output of box count + // query to get the rank. + // + for (int64_t i = 0; i < batch_length; ++i) { + ++results_batch[i]; } - - if (T_DENSE_RANK) { - output[iframe] = tree.DenseRank(global_ranks[iframe]); - } else { - output[iframe] = tree.Rank(ties_low, global_ranks[iframe]); - if (!ties_low && (iframe < frame_begin || iframe >= frame_end)) { - ++output[iframe]; - } + } else { + // For TIES_HIGH we need to add 1 to the output only + // when the current row is outside of all the ranges defining its frame. + // + for (int64_t i = 0; i < batch_length; ++i) { + results_batch[i] += frames.IsRowInsideItsFrame(batch_begin + i) ? 0 : 1; } } -} -void WindowRankBasic::Global(RankType rank_type, int64_t num_rows, const uint64_t* bitvec, - int64_t* output) { - int64_t current_group_id; - int64_t first_in_group; - int64_t num_in_group; - for (int64_t i = 0; i < num_rows; ++i) { - if (i == 0) { - current_group_id = 0; - first_in_group = i; - } else { - if (bit_util::GetBit(reinterpret_cast(bitvec), i)) { - ++current_group_id; - first_in_group = i; - } - } - if (first_in_group == i) { - num_in_group = 1; - while (first_in_group + num_in_group < num_rows && - !bit_util::GetBit(reinterpret_cast(bitvec), - first_in_group + num_in_group)) { - ++num_in_group; - } - } + END_MINI_BATCH_FOR - switch (rank_type) { - case RankType::ROW_NUMBER: - output[i] = i + 1; - break; - case RankType::RANK_TIES_LOW: - output[i] = first_in_group + 1; - break; - case RankType::RANK_TIES_HIGH: - output[i] = first_in_group + num_in_group; - break; - case RankType::DENSE_RANK: - output[i] = current_group_id + 1; - break; - } - } + return Status::OK(); } -void WindowRankBasic::WithinFrame(RankType rank_type, int64_t num_rows, - const uint64_t* bitvec, const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output) { +void WindowRank_Global_Ref::Eval(RankType rank_type, const BitVectorNavigator& tie_begins, + int64_t* results) { + int64_t num_rows = tie_begins.bit_count(); + const uint8_t* bit_vector = tie_begins.GetBytes(); + + std::vector peer_group_offsets; for (int64_t i = 0; i < num_rows; ++i) { - int64_t begin = frame_begins[i]; - int64_t end = frame_ends[i]; - if (end == begin) { - output[i] = 1; - continue; - } - int64_t num_words = bit_util::CeilDiv(end - begin + 1, 64); - std::vector frame_bitvec(num_words); - memset(frame_bitvec.data(), 0, num_words * sizeof(uint64_t)); - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), 0); - - int64_t start_offset = 0; - if (i < begin) { - start_offset = 1; - for (int64_t j = i + 1; j <= begin; ++j) { - if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), start_offset); - } - } - } - for (int64_t j = begin; j < end; ++j) { - if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), - j - begin + start_offset); - } + if (bit_util::GetBit(bit_vector, i)) { + peer_group_offsets.push_back(i); } - if (i >= end) { - for (int64_t j = end; j <= i; ++j) { - if (bit_util::GetBit(reinterpret_cast(bitvec), j)) { - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), - end - begin + start_offset); + } + int64_t num_peer_groups = static_cast(peer_group_offsets.size()); + peer_group_offsets.push_back(num_rows); + + for (int64_t peer_group = 0; peer_group < num_peer_groups; ++peer_group) { + int64_t peer_group_begin = peer_group_offsets[peer_group]; + int64_t peer_group_end = peer_group_offsets[peer_group + 1]; + for (int64_t i = peer_group_begin; i < peer_group_end; ++i) { + int64_t row_index = i; + int64_t rank; + switch (rank_type) { + case RankType::ROW_NUMBER: + rank = row_index + 1; + break; + case RankType::RANK_TIES_LOW: + rank = peer_group_begin + 1; + break; + case RankType::RANK_TIES_HIGH: + rank = peer_group_end; + break; + case RankType::DENSE_RANK: + rank = peer_group + 1; break; - } } - } - std::vector frame_output(end - begin + 1); - Global(rank_type, end - begin + ((i < begin || i >= end) ? 1 : 0), - frame_bitvec.data(), frame_output.data()); - if (i < begin) { - output[i] = frame_output[0]; - } else { - output[i] = frame_output[std::min(end, i) - begin]; + results[row_index] = rank; } } } -void WindowRankBasic::SeparateAttribute(RankType rank_type, int64_t num_rows, - const int64_t* begins, const int64_t* ends, - const int64_t* global_ranks_sorted, - const int64_t* permutation, int64_t* output) { - if (num_rows == 0) { - return; - } - - std::vector inverse_permutation(num_rows); - for (int64_t i = 0; i < num_rows; ++i) { - inverse_permutation[permutation[i]] = i; - } - - for (int64_t i = 0; i < num_rows; ++i) { - int64_t begin = begins[i]; - int64_t end = ends[i]; - if (end == begin) { - output[i] = 1; - continue; - } - - // position in the array of sorted global ranks and row number - std::vector> rank_row; - for (int64_t j = begin; j < end; ++j) { - rank_row.push_back(std::make_pair(inverse_permutation[j], j)); - } - if (i >= end || i < begin) { - rank_row.push_back(std::make_pair(inverse_permutation[i], i)); - } +void WindowRank_Framed_Ref::Eval(RankType rank_type, + const BitVectorNavigator& rank_key_tie_begins, + const int64_t* order_by_rank_key, + const WindowFrames& frames, int64_t* results) { + int64_t num_rows = rank_key_tie_begins.bit_count(); - int64_t rank_row_length = static_cast(rank_row.size()); - std::sort(rank_row.begin(), rank_row.end()); + std::vector global_ranks_order_by_rank_key(num_rows); + WindowRank_Global_Ref::Eval(rank_type, rank_key_tie_begins, + global_ranks_order_by_rank_key.data()); - int64_t num_words = bit_util::CeilDiv(end - begin + 1, 64); - std::vector frame_bitvec(num_words); - memset(frame_bitvec.data(), 0, num_words * sizeof(uint64_t)); - for (int64_t j = 0; j < rank_row_length; ++j) { - if (j == 0 || global_ranks_sorted[rank_row[j - 1].first] != - global_ranks_sorted[rank_row[j].first]) { - bit_util::SetBit(reinterpret_cast(frame_bitvec.data()), j); - } + std::vector global_ranks(num_rows); + if (!order_by_rank_key) { + for (int64_t i = 0; i < num_rows; ++i) { + global_ranks[i] = global_ranks_order_by_rank_key[i]; } - std::vector frame_output(rank_row_length); - Global(rank_type, rank_row_length, frame_bitvec.data(), frame_output.data()); - for (int64_t j = 0; j < rank_row_length; ++j) { - if (rank_row[j].second == i) { - output[i] = frame_output[j]; - break; - } + } else { + for (int64_t i = 0; i < num_rows; ++i) { + global_ranks[order_by_rank_key[i]] = global_ranks_order_by_rank_key[i]; } } -} -void WindowRankTest::TestRank(RankType rank_type, bool separate_ranking_attribute, - bool use_frames, bool use_progressive_frames) { - Random64BitCopy rand; - MemoryPool* pool = default_memory_pool(); - util::TempVectorStack temp_vector_stack; - Status status = temp_vector_stack.Init(pool, 128 * util::MiniBatch::kMiniBatchLength); - ARROW_DCHECK(status.ok()); - int64_t hardware_flags = 0LL; - - constexpr int num_tests = 100; - const int num_tests_to_skip = 0; - for (int test = 0; test < num_tests; ++test) { - // Generate random values - // - constexpr int64_t max_rows = 1100; - int64_t num_rows = rand.from_range(static_cast(1LL), max_rows); - std::vector vals(num_rows); - constexpr int64_t max_val = 65535; - int tie_probability = rand.from_range(0, 256); - for (int64_t i = 0; i < num_rows; ++i) { - bool tie = rand.from_range(0, 255) < tie_probability; - if (tie && i > 0) { - vals[i] = vals[rand.from_range(static_cast(0LL), i - 1)]; - } else { - vals[i] = rand.from_range(static_cast(0LL), max_val); - } - } + for (int64_t frame_index = 0; frame_index < frames.num_frames; ++frame_index) { + int64_t current_row_index = frames.first_row_index + frame_index; - // Generate random frames + // Compute list of global ranks for all rows within the frame. // - std::vector begins; - std::vector ends; - GenerateTestFrames(rand, num_rows, begins, ends, - /*progressive=*/use_progressive_frames, - /*expansive=*/false); - - if (test < num_tests_to_skip) { - continue; - } - - // Sort values and output permutation and bit vector of ties + // Make sure to include the current row in the frame, even if it lies + // outside of the ranges defining its. // - int64_t num_bit_words = bit_util::CeilDiv(num_rows, 64); - std::vector ties_bitvec(num_bit_words); - std::vector ties_popcounts(num_bit_words); - std::vector permutation(num_rows); - { - std::vector> val_row_pairs(num_rows); - for (int64_t i = 0; i < num_rows; ++i) { - val_row_pairs[i] = std::make_pair(vals[i], i); - } - std::sort(val_row_pairs.begin(), val_row_pairs.end()); - for (int64_t i = 0; i < num_rows; ++i) { - permutation[i] = val_row_pairs[i].second; + std::vector global_ranks_within_frame; + bool current_row_included = false; + for (int64_t range_index = 0; range_index < frames.num_ranges_per_frame; + ++range_index) { + int64_t begin = frames.begins[range_index][frame_index]; + int64_t end = frames.ends[range_index][frame_index]; + if (!current_row_included && current_row_index < begin) { + global_ranks_within_frame.push_back(global_ranks[current_row_index]); + current_row_included = true; } - memset(ties_bitvec.data(), 0, num_bit_words * sizeof(uint64_t)); - for (int64_t i = 0; i < num_rows; ++i) { - bool is_first_in_group = - (i == 0 || val_row_pairs[i - 1].first != val_row_pairs[i].first); - if (is_first_in_group) { - bit_util::SetBit(reinterpret_cast(ties_bitvec.data()), i); + for (int64_t row_index = begin; row_index < end; ++row_index) { + if (row_index == current_row_index) { + current_row_included = true; } + global_ranks_within_frame.push_back(global_ranks[row_index]); } - BitVectorNavigator::GenPopCounts(num_rows, ties_bitvec.data(), - ties_popcounts.data()); } - - // Generate global ranks for the case when window frames use different - // row order - // - std::vector global_ranks(num_rows); - WindowRankBasic::Global(rank_type, num_rows, ties_bitvec.data(), global_ranks.data()); - - printf("num_rows %d ", static_cast(num_rows)); - - std::vector output[2]; - output[0].resize(num_rows); - output[1].resize(num_rows); - - int64_t num_repeats; -#ifndef NDEBUG - num_repeats = 1; -#else - num_repeats = std::max(1LL, 1024 * 1024LL / num_rows); -#endif - printf("num_repeats %d ", static_cast(num_repeats)); - - // int64_t start = __rdtsc(); - for (int repeat = 0; repeat < num_repeats; ++repeat) { - if (!use_frames) { - WindowRankBasic::Global(rank_type, num_rows, ties_bitvec.data(), - output[0].data()); - } else if (!separate_ranking_attribute) { - WindowRankBasic::WithinFrame(rank_type, num_rows, ties_bitvec.data(), - begins.data(), ends.data(), output[0].data()); - } else { - WindowRankBasic::SeparateAttribute(rank_type, num_rows, begins.data(), - ends.data(), global_ranks.data(), - permutation.data(), output[0].data()); + if (!current_row_included) { + global_ranks_within_frame.push_back(global_ranks[current_row_index]); + current_row_included = true; + } + + int64_t rank = 0; + for (int64_t frame_row_index = 0; + frame_row_index < static_cast(global_ranks_within_frame.size()); + ++frame_row_index) { + switch (rank_type) { + case RankType::ROW_NUMBER: + // Count the number of rows in the frame with lower global rank. + // + if (global_ranks_within_frame[frame_row_index] < + global_ranks[current_row_index]) { + ++rank; + } + break; + case RankType::RANK_TIES_LOW: + // Count the number of rows in the frame with lower global rank. + // + if (global_ranks_within_frame[frame_row_index] < + global_ranks[current_row_index]) { + ++rank; + } + break; + case RankType::RANK_TIES_HIGH: + // Count the number of rows in the frame with lower or equal global + // rank. + // + if (global_ranks_within_frame[frame_row_index] <= + global_ranks[current_row_index]) { + ++rank; + } + break; + case RankType::DENSE_RANK: + // Count the number of rows in the frame with lower global rank that + // have global rank different than the previous row. + // + bool global_rank_changed = + (frame_row_index == 0) || (global_ranks_within_frame[frame_row_index] != + global_ranks_within_frame[frame_row_index - 1]); + if (global_ranks_within_frame[frame_row_index] < + global_ranks[current_row_index] && + global_rank_changed) { + ++rank; + } + break; } } - // int64_t end = __rdtsc(); - // printf("cpr basic %.1f ", - // static_cast(end - start) / static_cast(num_rows * - // num_repeats)); - // start = __rdtsc(); - for (int repeat = 0; repeat < num_repeats; ++repeat) { - if (!use_frames) { - WindowRank::Global(rank_type, num_rows, ties_bitvec.data(), ties_popcounts.data(), - output[1].data(), hardware_flags, &temp_vector_stack); - } else if (!separate_ranking_attribute) { - WindowRank::WithinFrame(rank_type, num_rows, ties_bitvec.data(), - ties_popcounts.data(), begins.data(), ends.data(), - output[1].data(), hardware_flags, &temp_vector_stack); - } else { - WindowRank::OnSeparateAttribute(rank_type, num_rows, global_ranks.data(), - permutation.data(), use_progressive_frames, - begins.data(), ends.data(), output[1].data(), - hardware_flags, &temp_vector_stack); - } + // For all rank types except for RANK_TIES_HIGH increment obtained rank + // value by 1. + // + if (rank_type != RankType::RANK_TIES_HIGH) { + ++rank; } - // end = __rdtsc(); - // printf("cpr normal %.1f ", - // static_cast(end - start) / static_cast(num_rows * - // num_repeats)); - bool ok = true; - for (int64_t i = 0; i < num_rows; ++i) { - if (output[0][i] != output[1][i]) { - ARROW_DCHECK(false); - ok = false; - } - } - printf("%s\n", ok ? "correct" : "wrong"); + results[frame_index] = rank; } } diff --git a/cpp/src/arrow/compute/exec/window_functions/window_rank.h b/cpp/src/arrow/compute/exec/window_functions/window_rank.h index e6da5e17b2d..a640dc3f516 100644 --- a/cpp/src/arrow/compute/exec/window_functions/window_rank.h +++ b/cpp/src/arrow/compute/exec/window_functions/window_rank.h @@ -22,19 +22,15 @@ #include "arrow/compute/exec/util.h" #include "arrow/compute/exec/window_functions/bit_vector_navigator.h" #include "arrow/compute/exec/window_functions/merge_tree.h" -#include "arrow/compute/exec/window_functions/range_tree.h" -#include "arrow/compute/exec/window_functions/splay_tree.h" #include "arrow/compute/exec/window_functions/window_frame.h" +// TODO: Add support for CUME_DIST and NTILE +// TODO: Add support for rank with limit +// TODO: Add support for rank with global filter + namespace arrow { namespace compute { -// TODO: Current row does not have to be inside its frame. -// Make sure that ranking functions behave well in that case. -// - -// TODO: Scale ranks to achieve CUME_DIST and NTILE values. - enum class RankType : int { ROW_NUMBER = 0, RANK_TIES_LOW = 1, @@ -42,107 +38,56 @@ enum class RankType : int { DENSE_RANK = 3 }; -class WindowRank { +class ARROW_EXPORT WindowRank_Global { public: - static void Global(RankType rank_type, int64_t num_rows, const uint64_t* ties_bitvec, - const uint64_t* ties_popcounts, int64_t* output, - int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); + static void Eval(RankType rank_type, const BitVectorNavigator& tie_begins, + int64_t batch_begin, int64_t batch_end, int64_t* results); +}; - static void WithinFrame(RankType rank_type, int64_t num_rows, - const uint64_t* ties_bitvec, const uint64_t* ties_popcounts, - const int64_t* frame_begins, const int64_t* frame_ends, - int64_t* output, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack); +class ARROW_EXPORT WindowRank_Framed1D { + public: + static void Eval(RankType rank_type, const BitVectorNavigator& tie_begins, + const WindowFrames& frames, int64_t* results); +}; - static void OnSeparateAttribute(RankType rank_type, int64_t num_rows, - const int64_t* global_ranks_sorted, - const int64_t* permutation, bool progressive_frames, - const int64_t* frame_begins, const int64_t* frame_ends, - int64_t* output, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack); +class ARROW_EXPORT WindowRank_Framed2D { + public: + static Status Eval(RankType rank_type, const BitVectorNavigator& rank_key_tie_begins, + const int64_t* order_by_rank_key, const WindowFrames& frames, + int64_t* results, ThreadContext& thread_context); private: - static void GlobalRank(bool ties_low, int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, int64_t* output, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack); - - static void GlobalDenseRank(int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, int64_t* output); - - static void GlobalRowNumber(int64_t num_rows, int64_t* output); - - static void RankWithinFrame(bool ties_low, int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output, - int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack); - - static void DenseRankWithinFrame(int64_t num_rows, const uint64_t* bitvec, - const uint64_t* popcounts, const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output); - - static void RowNumberWithinFrame(int64_t num_rows, const int64_t* frame_begins, - const int64_t* frame_ends, int64_t* output); - - static void SeparateAttributeRank( - bool ties_low, - /* number of rows and number of frames */ - int64_t num_rows, const int64_t* begins, const int64_t* ends, - /* Sorted (in ascending order) ranks (with respect to ranking attribute) - for all rows */ - /* null can be passed if all ranks are distinct (in which case the - sorted array would just contain sequence of integers from 1 to num_rows). - Supplying null changes the semantics from rank to row number. */ - const int64_t* ranks_sorted, - /* Permutation of row numbers that results in sortedness on ranking - attribute */ - const int64_t* permutation, int64_t* output, int64_t hardware_flags, - util::TempVectorStack* temp_vector_stack); - - static void SeparateAttributeDenseRank( - int64_t num_rows, const int64_t* begins, const int64_t* ends, - /* The following two arrays must be the result of sorting rows on - (global dense rank, row number within window) pairs. Within a group of - peers with that same dense ranks rows must come in the order in which - they appeared in the window. This could be accomplished by a stable - sort of window on the dense rank value. - */ - const int64_t* global_dense_ranks_sorted, const int64_t* permutation, - int64_t* output, int64_t hardware_flags, util::TempVectorStack* temp_vector_stack); - - static void ProgressiveSeparateAttributeRank(bool dense_rank, bool ties_low, - int64_t num_rows, const int64_t* begins, - const int64_t* ends, - const int64_t* global_ranks_sorted, - const int64_t* permutation, - int64_t* output); - - template - static void ProgressiveSeparateAttributeRankImp( - bool ties_low, int64_t num_rows, const int64_t* begins, const int64_t* ends, - const int64_t* global_ranks_sorted, const int64_t* permutation, int64_t* output); + static Status DenseRankWithRangeTree() { + // TODO: Implement + ARROW_DCHECK(false); + return Status::OK(); + } + static Status DenseRankWithSplayTree() { + // TODO: Implement + ARROW_DCHECK(false); + return Status::OK(); + } }; -class WindowRankBasic { +// Reference implementations used for testing. +// +// May also be useful for understanding the expected behaviour of the actual +// implementations, which trade simplicity for efficiency. +// +class ARROW_EXPORT WindowRank_Global_Ref { public: - static void Global(RankType rank_type, int64_t num_rows, const uint64_t* bitvec, - int64_t* output); - - static void WithinFrame(RankType rank_type, int64_t num_rows, const uint64_t* bitvec, - const int64_t* frame_begins, const int64_t* frame_ends, - int64_t* output); - - static void SeparateAttribute(RankType rank_type, int64_t num_rows, - const int64_t* begins, const int64_t* ends, - const int64_t* global_ranks_sorted, - const int64_t* permutation, int64_t* output); + static void Eval(RankType rank_type, const BitVectorNavigator& tie_begins, + int64_t* results); }; -class WindowRankTest { +class ARROW_EXPORT WindowRank_Framed_Ref { public: - static void TestRank(RankType rank_type, bool separate_ranking_attribute, - bool use_frames, bool use_progressive_frames); + // For 1D variant use null pointer for the permutation of rows ordered by + // ranking key. That will assume that the permutation is an identity mapping. + // + static void Eval(RankType rank_type, const BitVectorNavigator& rank_key_tie_begins, + const int64_t* order_by_rank_key, const WindowFrames& frames, + int64_t* results); }; } // namespace compute diff --git a/cpp/src/arrow/compute/exec/window_functions/window_test.cc b/cpp/src/arrow/compute/exec/window_functions/window_test.cc new file mode 100644 index 00000000000..4398ca03a91 --- /dev/null +++ b/cpp/src/arrow/compute/exec/window_functions/window_test.cc @@ -0,0 +1,453 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include +#include "arrow/compute/exec/test_util.h" +#include "arrow/compute/exec/util.h" +#include "arrow/compute/exec/window_functions/window_rank.h" + +namespace arrow { +namespace compute { + +class WindowFramesRandom { + public: + static void Generate(Random64Bit& rand, WindowFrameSequenceType frame_sequence_type, + int64_t num_rows, int num_ranges_per_frame, + std::vector>* range_boundaries); + + static void GenerateSliding(Random64Bit& rand, int64_t num_rows, + int num_ranges_per_frame, + std::vector>* range_boundaries, + int64_t suggested_frame_span, int64_t suggested_gap_length); + + static void GenerateCummulative(Random64Bit& rand, int64_t num_rows, + int num_ranges_per_frame, + std::vector>* range_boundaries, + int num_restarts); + + static void GenerateGeneric(Random64Bit& rand, int64_t num_rows, + int num_ranges_per_frame, + std::vector>* range_boundaries, + int64_t max_frame_span, int64_t max_gap_length); + + private: + static void CutHoles(Random64Bit& rand, int64_t frame_span, int64_t num_holes, + int64_t sum_hole_size, std::vector& result_boundaries); +}; + +void WindowFramesRandom::Generate(Random64Bit& rand, + WindowFrameSequenceType frame_sequence_type, + int64_t num_rows, int num_ranges_per_frame, + std::vector>* range_boundaries) { + switch (frame_sequence_type) { + case WindowFrameSequenceType::CUMMULATIVE: + GenerateCummulative(rand, num_rows, num_ranges_per_frame, range_boundaries, + /*num_restarts=*/rand.from_range(0, 2)); + break; + case WindowFrameSequenceType::SLIDING: { + int64_t suggested_frame_span = + rand.from_range(static_cast(0), num_rows / 4); + int64_t suggested_gap_length = + rand.from_range(static_cast(0), suggested_frame_span / 2); + GenerateSliding(rand, num_rows, num_ranges_per_frame, range_boundaries, + suggested_frame_span, suggested_gap_length); + } break; + case WindowFrameSequenceType::GENERIC: { + int64_t max_frame_span = rand.from_range(static_cast(0), num_rows / 4); + int64_t max_gap_length = + rand.from_range(static_cast(0), max_frame_span / 2); + GenerateGeneric(rand, num_rows, num_ranges_per_frame, range_boundaries, + max_frame_span, max_gap_length); + } break; + } +} + +void WindowFramesRandom::GenerateSliding( + Random64Bit& rand, int64_t num_rows, int num_ranges_per_frame, + std::vector>* range_boundaries, int64_t suggested_frame_span, + int64_t suggested_gap_length) { + if (num_rows == 0) { + return; + } + + // Generate a sorted list of points that will serve as frame boundaries (for + // all ranges in all frames). + // + std::vector boundaries(num_rows + suggested_frame_span); + for (size_t i = 0; i < boundaries.size(); ++i) { + boundaries[i] = rand.from_range(static_cast(0), num_rows); + } + std::sort(boundaries.begin(), boundaries.end()); + + // Generate desired first frame (relative positions and sizes of ranges in + // it). + // + // This will serve as a basis for distances between range boundary points. + // + std::vector desired_boundaries; + CutHoles(rand, suggested_frame_span, num_ranges_per_frame - 1, suggested_gap_length, + desired_boundaries); + + // Assign boundary points from the sorted random vector at predetermined + // distances from each other to consecutive frames. + // + range_boundaries->resize(num_ranges_per_frame * 2); + for (size_t i = 0; i < range_boundaries->size(); ++i) { + (*range_boundaries)[i].clear(); + } + for (int64_t i = 0; i < num_rows; ++i) { + for (int boundary_index = 0; boundary_index < 2 * num_ranges_per_frame; + ++boundary_index) { + (*range_boundaries)[boundary_index].push_back( + boundaries[i + desired_boundaries[boundary_index]]); + } + } +} + +void WindowFramesRandom::GenerateCummulative( + Random64Bit& rand, int64_t num_rows, int num_ranges_per_frame, + std::vector>* range_boundaries, int num_restarts) { + int num_boundaries_per_frame = 2 * num_ranges_per_frame; + range_boundaries->resize(num_boundaries_per_frame); + for (int64_t i = 0; i < num_boundaries_per_frame; ++i) { + (*range_boundaries)[i].clear(); + } + + // Divide rows into sections, each dedicated to a different range. + // + std::vector sections; + sections.push_back(0); + sections.push_back(num_rows); + for (int i = 0; i < num_ranges_per_frame - 1; ++i) { + sections.push_back(rand.from_range(static_cast(0), num_rows)); + } + std::sort(sections.begin(), sections.end()); + + // Process each section (range) separately. + // + for (int range_index = 0; range_index < num_ranges_per_frame; ++range_index) { + std::vector boundaries(num_rows + num_restarts + 1); + for (int64_t i = 0; i < num_rows + num_restarts + 1; ++i) { + boundaries[i] = rand.from_range(sections[range_index], sections[range_index + 1]); + } + std::sort(boundaries.begin(), boundaries.end()); + + // Mark restart points in the boundaries vector. + // + std::vector boundary_is_restart_point(boundaries.size()); + for (int64_t i = 0; i < num_rows + num_restarts + 1; ++i) { + boundary_is_restart_point[i] = false; + } + boundary_is_restart_point[0] = true; + for (int i = 0; i < num_restarts; ++i) { + for (;;) { + int64_t pos = + rand.from_range(static_cast(0), num_rows + num_restarts - 1); + if (!boundary_is_restart_point[pos]) { + boundary_is_restart_point[pos] = true; + break; + } + } + } + + // Output results for next range. + // + int64_t current_begin = 0; + for (int64_t i = 0; i < num_rows + num_restarts + 1; ++i) { + if (boundary_is_restart_point[i]) { + current_begin = boundaries[i]; + } else { + (*range_boundaries)[2 * range_index + 0].push_back(current_begin); + (*range_boundaries)[2 * range_index + 1].push_back(boundaries[i]); + } + } + } +} + +void WindowFramesRandom::GenerateGeneric( + Random64Bit& rand, int64_t num_rows, int num_ranges_per_frame, + std::vector>* range_boundaries, int64_t max_frame_span, + int64_t max_gap_length) { + int num_boundaries_per_frame = 2 * num_ranges_per_frame; + range_boundaries->resize(num_boundaries_per_frame); + for (int64_t i = 0; i < num_boundaries_per_frame; ++i) { + (*range_boundaries)[i].clear(); + } + + for (int64_t row_index = 0; row_index < num_rows; ++row_index) { + int64_t frame_span = + rand.from_range(static_cast(0), std::min(num_rows, max_frame_span)); + int64_t gap_length = + rand.from_range(static_cast(0), std::min(frame_span, max_gap_length)); + int64_t frame_pos = rand.from_range(static_cast(0), num_rows - frame_span); + std::vector frame_boundaries; + CutHoles(rand, frame_span, num_ranges_per_frame - 1, gap_length, frame_boundaries); + for (size_t i = 0; i < frame_boundaries.size(); ++i) { + (*range_boundaries)[i].push_back(frame_boundaries[i] + frame_pos); + } + } +} + +void WindowFramesRandom::CutHoles(Random64Bit& rand, int64_t frame_span, + int64_t num_holes, int64_t sum_hole_size, + std::vector& result_boundaries) { + // Randomly pick size of each hole so that the sum is equal to the requested + // total. + // + ARROW_DCHECK(sum_hole_size <= frame_span); + std::vector cummulative_hole_sizes(num_holes + 1); + cummulative_hole_sizes[0] = 0; + for (int64_t i = 1; i < num_holes; ++i) { + cummulative_hole_sizes[i] = rand.from_range(static_cast(0), sum_hole_size); + } + cummulative_hole_sizes[num_holes] = sum_hole_size; + std::sort(cummulative_hole_sizes.begin(), cummulative_hole_sizes.end()); + + // Randomly pick starting position for each hole. + // + std::vector hole_pos(num_holes); + for (int64_t i = 0; i < num_holes; ++i) { + hole_pos[i] = rand.from_range(static_cast(0), frame_span - sum_hole_size); + } + std::sort(hole_pos.begin(), hole_pos.end()); + for (int64_t i = 0; i < num_holes; ++i) { + hole_pos[i] += cummulative_hole_sizes[i]; + } + + // Output result. + // + int64_t num_boundaries = (num_holes + 1) * 2; + result_boundaries.resize(num_boundaries); + result_boundaries[0] = 0; + result_boundaries[num_boundaries - 1] = frame_span; + for (int64_t i = 0; i < num_holes; ++i) { + result_boundaries[1 + 2 * i] = hole_pos[i]; + result_boundaries[2 + 2 * i] = + hole_pos[i] + cummulative_hole_sizes[i + 1] - cummulative_hole_sizes[i]; + } +} + +void TestWindowRankVariant(RankType rank_type, bool use_frames, bool use_2D) { + // TODO: Framed dense rank is not implemented yet: + // + ARROW_DCHECK(!(rank_type == RankType::DENSE_RANK && use_2D)); + + Random64Bit rand(/*seed=*/0); + + // Preparing thread execution context + // + MemoryPool* pool = default_memory_pool(); + util::TempVectorStack temp_vector_stack; + Status status = temp_vector_stack.Init(pool, 128 * util::MiniBatch::kMiniBatchLength); + ARROW_DCHECK(status.ok()); + ThreadContext thread_context; + thread_context.thread_index = 0; + thread_context.temp_vector_stack = &temp_vector_stack; + thread_context.hardware_flags = 0LL; + + // There will be: 24 small tests, 12 medium tests and 3 large tests. + // + constexpr int num_tests = 24 + 12 + 3; + + // When debugging a failed test case, setting this value allows to skip + // execution of the first couple of test cases to go directly into the + // interesting one, while at the same time making sure that the generated + // random numbers are not affected. + // + const int num_tests_to_skip = 2; + + for (int test = 0; test < num_tests; ++test) { + // Generate random values. + // + // There will be: 24 small tests, 12 medium tests and 3 large tests. + // + int64_t max_rows = (test < 24) ? 100 : (test < 36) ? 256 : 2500; + int64_t num_rows = rand.from_range(static_cast(1), max_rows); + std::vector vals(num_rows); + int64_t max_val = num_rows; + int tie_probability = rand.from_range(0, 256); + for (int64_t i = 0; i < num_rows; ++i) { + bool tie = rand.from_range(0, 255) < tie_probability; + if (tie && i > 0) { + vals[i] = vals[rand.from_range(static_cast(0), i - 1)]; + } else { + vals[i] = rand.from_range(static_cast(0), max_val); + } + } + + // Generate random frames + // + int num_ranges_per_frame = rand.from_range(1, 3); + std::vector> range_boundaries; + int frame_sequence_type_index = rand.from_range(0, 2); + WindowFrameSequenceType frame_sequence_type = + (frame_sequence_type_index == 0) ? WindowFrameSequenceType::GENERIC + : (frame_sequence_type_index == 1) ? WindowFrameSequenceType::SLIDING + : WindowFrameSequenceType::CUMMULATIVE; + WindowFramesRandom::Generate(rand, frame_sequence_type, num_rows, + num_ranges_per_frame, &range_boundaries); + WindowFrames frames; + frames.first_row_index = 0; + frames.num_frames = num_rows; + frames.num_ranges_per_frame = num_ranges_per_frame; + for (int range_index = 0; range_index < num_ranges_per_frame; ++range_index) { + frames.begins[range_index] = range_boundaries[2 * range_index + 0].data(); + frames.ends[range_index] = range_boundaries[2 * range_index + 1].data(); + } + + // Random number generator is not used after this point in the test case, + // so we can skip the rest of the test case if we try to fast forward to a + // specific one. + // + if (test < num_tests_to_skip) { + continue; + } + + // Sort values and output permutation and bit vector of ties + // + BitVectorWithCounts tie_begins; + tie_begins.Resize(num_rows); + std::vector permutation(num_rows); + std::vector vals_sorted(num_rows); + { + std::vector> val_row_pairs(num_rows); + for (int64_t i = 0; i < num_rows; ++i) { + val_row_pairs[i] = std::make_pair(vals[i], i); + } + std::sort(val_row_pairs.begin(), val_row_pairs.end()); + for (int64_t i = 0; i < num_rows; ++i) { + permutation[i] = val_row_pairs[i].second; + vals_sorted[i] = val_row_pairs[i].first; + } + tie_begins.GetNavigator().MarkTieBegins(num_rows, vals_sorted.data()); + } + + ARROW_SCOPED_TRACE( + "num_rows = ", static_cast(num_rows), + "num_ranges_per_frame = ", num_ranges_per_frame, "window_frame_type = ", + use_frames + ? (frame_sequence_type == WindowFrameSequenceType::CUMMULATIVE ? "CUMMULATIVE" + : frame_sequence_type == WindowFrameSequenceType::SLIDING ? "SLIDING" + : "GENERIC") + : "NONE", + "rank_type = ", + rank_type == RankType::ROW_NUMBER ? "ROW_NUMBER" + : rank_type == RankType::RANK_TIES_LOW ? "RANK_TIES_LOW" + : rank_type == RankType::RANK_TIES_HIGH ? "RANK_TIES_HIGH" + : "DENSE_RANK", + "use_2D = ", use_2D); + + // At index 0 - reference results. + // At index 1 - actual results from implementation we wish to verify. + // + std::vector output[2]; + output[0].resize(num_rows); + output[1].resize(num_rows); + + // Execute reference implementation. + // + if (!use_frames) { + WindowRank_Global_Ref::Eval(rank_type, tie_begins.GetNavigator(), output[0].data()); + } else if (!use_2D) { + WindowRank_Framed_Ref::Eval(rank_type, tie_begins.GetNavigator(), nullptr, frames, + output[0].data()); + } else { + WindowRank_Framed_Ref::Eval(rank_type, tie_begins.GetNavigator(), + permutation.data(), frames, output[0].data()); + } + + // Execute actual implementation. + // + if (!use_frames) { + WindowRank_Global::Eval(rank_type, tie_begins.GetNavigator(), 0, num_rows, + output[1].data()); + } else if (!use_2D) { + WindowRank_Framed1D::Eval(rank_type, tie_begins.GetNavigator(), frames, + output[1].data()); + } else { + ASSERT_OK(WindowRank_Framed2D::Eval(rank_type, tie_begins.GetNavigator(), + permutation.data(), frames, output[1].data(), + thread_context)); + } + + bool ok = true; + for (int64_t i = 0; i < num_rows; ++i) { + if (output[0][i] != output[1][i]) { + ARROW_DCHECK(false); + ok = false; + } + } + ASSERT_TRUE(ok); + } +} + +TEST(WindowFunctions, Rank) { + // These flags are useful during debugging, to quickly restrict the set of + // executed tests to just the failing one. + // + bool use_filter_framed = false; + bool use_filter_rank_type = false; + bool use_filter_2D = false; + + bool filter_framed_value = true; + RankType filter_rank_type_value = RankType::RANK_TIES_HIGH; + bool filter_2D_value = true; + + // Global rank + // + for (auto rank_type : {RankType::ROW_NUMBER, RankType::RANK_TIES_LOW, + RankType::RANK_TIES_HIGH, RankType::DENSE_RANK}) { + if (use_filter_2D && filter_2D_value) { + continue; + } + if (use_filter_framed && filter_framed_value) { + continue; + } + if (use_filter_rank_type && filter_rank_type_value != rank_type) { + continue; + } + TestWindowRankVariant(rank_type, + /*use_frames=*/false, + /*ignored*/ false); + } + + // Framed rank + // + for (auto use_2D : {false, true}) { + for (auto rank_type : {RankType::ROW_NUMBER, RankType::RANK_TIES_LOW, + RankType::RANK_TIES_HIGH, RankType::DENSE_RANK}) { + if (use_filter_framed && !filter_framed_value) { + continue; + } + if (use_filter_rank_type && filter_rank_type_value != rank_type) { + continue; + } + if (use_filter_2D && filter_2D_value != use_2D) { + continue; + } + if (rank_type == RankType::DENSE_RANK && use_2D) { + continue; + } + TestWindowRankVariant(rank_type, /*use_frames=*/true, use_2D); + } + } +} + +} // namespace compute +} // namespace arrow