From 63873742fd9dda1b91abc3a40b6e08f38a94a91e Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 30 Jun 2021 13:43:26 +0200 Subject: [PATCH 1/7] Rework slicing algorithm for merged AODs --- Framework/Core/include/Framework/Kernels.h | 70 +++++++++++++--------- 1 file changed, 43 insertions(+), 27 deletions(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 475fcb641d2e6..044db2ff0de38 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -30,12 +30,13 @@ namespace o2::framework /// @a offset the offset in the original table at which the corresponding /// slice was split. template -auto sliceByColumn(char const* key, - std::shared_ptr const& input, - T fullSize, - std::vector* slices, - std::vector* vals = nullptr, - std::vector* offsets = nullptr) +auto sliceByColumn( + char const* key, + std::shared_ptr const& input, + T fullSize, + std::vector* slices, + std::vector* vals = nullptr, + std::vector* offsets = nullptr) { arrow::Datum value_counts; auto options = arrow::compute::CountOptions::Defaults(); @@ -47,7 +48,7 @@ auto sliceByColumn(char const* key, auto counts = static_cast>(pair.field(1)->data()); // create slices and offsets - auto offset = 0; + uint64_t offset = 0; auto count = 0; auto size = values.length(); @@ -57,37 +58,52 @@ auto sliceByColumn(char const* key, } } - auto makeSlice = [&](T count) { - slices->emplace_back(arrow::Datum{input->Slice(offset, count)}); + auto makeSlice = [&](uint64_t offset_, T count) { + slices->emplace_back(arrow::Datum{input->Slice(offset_, count)}); if (offsets) { - offsets->emplace_back(offset); + offsets->emplace_back(offset_); } }; - auto current = 0; auto v = values.Value(0); - while (v - current >= 1) { - makeSlice(0); - ++current; - } - - for (auto r = 0; r < size - 1; ++r) { - count = counts.Value(r); - makeSlice(count); - offset += count; - auto nextValue = values.Value(r + 1); - auto value = values.Value(r); - while (nextValue - value > 1) { - makeSlice(0); - ++value; + auto vprev = v; + auto vnext = v; + for (auto i = 0; i < size - 1; ++i) { + vprev = v; + v = values.Value(i); + count = counts.Value(i); + if (v >= 0) { + if (vprev < 0 || (v - vprev) == 1) { + makeSlice(offset, count); + offset += count; + continue; + } else { + while (v - vprev > 1) { + makeSlice(offset, 0); + ++vprev; + } + makeSlice(offset, count); + offset += count; + continue; + } + } else { + vnext = values.Value(i + 1); + while (vnext - vprev > 1) { + makeSlice(offset, 0); + ++vprev; + } + makeSlice(offset, count); + offset += count; + continue; } } - makeSlice(counts.Value(size - 1)); + + makeSlice(offset, counts.Value(size - 1)); offset += counts.Value(size - 1); if (values.Value(size - 1) < fullSize - 1) { for (auto v = values.Value(size - 1) + 1; v < fullSize; ++v) { - makeSlice(0); + makeSlice(offset, 0); } } From f34a100727347e77f815f20458e7df74e5901edb Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 30 Jun 2021 15:02:10 +0200 Subject: [PATCH 2/7] decrease level of nesting --- Framework/Core/include/Framework/Kernels.h | 29 +++++++++++----------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 044db2ff0de38..c9ad4c46d6be6 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -72,21 +72,7 @@ auto sliceByColumn( vprev = v; v = values.Value(i); count = counts.Value(i); - if (v >= 0) { - if (vprev < 0 || (v - vprev) == 1) { - makeSlice(offset, count); - offset += count; - continue; - } else { - while (v - vprev > 1) { - makeSlice(offset, 0); - ++vprev; - } - makeSlice(offset, count); - offset += count; - continue; - } - } else { + if (v < 0) { vnext = values.Value(i + 1); while (vnext - vprev > 1) { makeSlice(offset, 0); @@ -96,6 +82,19 @@ auto sliceByColumn( offset += count; continue; } + if (vprev < 0 || (v - vprev) == 1) { + makeSlice(offset, count); + offset += count; + continue; + } else { + while (v - vprev > 1) { + makeSlice(offset, 0); + ++vprev; + } + makeSlice(offset, count); + offset += count; + continue; + } } makeSlice(offset, counts.Value(size - 1)); From 6aa99f46f47bd49a86f8bcb3401ee47de1ad4795 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 30 Jun 2021 15:03:55 +0200 Subject: [PATCH 3/7] fixup! decrease level of nesting --- Framework/Core/include/Framework/Kernels.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index c9ad4c46d6be6..2489bcb0817ba 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -86,15 +86,14 @@ auto sliceByColumn( makeSlice(offset, count); offset += count; continue; - } else { - while (v - vprev > 1) { - makeSlice(offset, 0); - ++vprev; - } - makeSlice(offset, count); - offset += count; - continue; } + while (v - vprev > 1) { + makeSlice(offset, 0); + ++vprev; + } + makeSlice(offset, count); + offset += count; + continue; } makeSlice(offset, counts.Value(size - 1)); From 9519493f4c36b08ac34026cd5de887ea54ec9096 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 30 Jun 2021 15:04:52 +0200 Subject: [PATCH 4/7] fixup! fixup! decrease level of nesting --- Framework/Core/include/Framework/Kernels.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 2489bcb0817ba..fd8406d6af8a6 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -93,7 +93,6 @@ auto sliceByColumn( } makeSlice(offset, count); offset += count; - continue; } makeSlice(offset, counts.Value(size - 1)); From dac185fc9a5b7adaa7435d5ded32ecaeee42a000 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Wed, 30 Jun 2021 15:19:31 +0200 Subject: [PATCH 5/7] simplify algorithm --- Framework/Core/include/Framework/Kernels.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index fd8406d6af8a6..bd714098b7692 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -68,28 +68,20 @@ auto sliceByColumn( auto v = values.Value(0); auto vprev = v; auto vnext = v; + auto nzeros = 0; for (auto i = 0; i < size - 1; ++i) { + nzeros = 0; vprev = v; v = values.Value(i); count = counts.Value(i); if (v < 0) { vnext = values.Value(i + 1); - while (vnext - vprev > 1) { - makeSlice(offset, 0); - ++vprev; - } - makeSlice(offset, count); - offset += count; - continue; + nzeros = vnext - vprev - 1; + } else if (vprev >= 0 && (v - vprev) != 1) { + nzeros = v - vprev - 1; } - if (vprev < 0 || (v - vprev) == 1) { - makeSlice(offset, count); - offset += count; - continue; - } - while (v - vprev > 1) { + for (auto z = 0; z < nzeros; ++z) { makeSlice(offset, 0); - ++vprev; } makeSlice(offset, count); offset += count; From fa1c1f9977a01164cd2a0e0661020535af485bf7 Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Fri, 2 Jul 2021 13:42:27 +0200 Subject: [PATCH 6/7] (optionally) return unassigned groups in a different vector greatly simplifying the grouping --- .../Core/include/Framework/AnalysisTask.h | 35 +------------ Framework/Core/include/Framework/Kernels.h | 50 ++++++++++--------- Framework/Core/test/test_Kernels.cxx | 2 +- 3 files changed, 29 insertions(+), 58 deletions(-) diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index d9621347cdaba..e9b4d9d17ec7d 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -262,23 +262,6 @@ struct AnalysisDataProcessorBuilder { } } - template - auto changeShifts() - { - constexpr auto index = framework::has_type_at_v(associated_pack_t{}); - if (unassignedGroups[index] > 0) { - uint64_t pos; - if constexpr (soa::is_soa_filtered_t>::value) { - pos = (*groupSelection)[position]; - } else { - pos = position; - } - if ((idValues[index])[pos] < 0) { - ++shifts[index]; - } - } - } - GroupSlicerIterator(G& gt, std::tuple& at) : mAt{&at}, mGroupingElement{gt.begin()}, @@ -299,14 +282,12 @@ struct AnalysisDataProcessorBuilder { x.asArrowTable(), static_cast(gt.tableSize()), &groups[index], - &idValues[index], &offsets[index]); if (result.ok() == false) { throw runtime_error("Cannot split collection"); } - unassignedGroups[index] = std::count_if(idValues[index].begin(), idValues[index].end(), [](auto&& x) { return x < 0; }); - if ((groups[index].size() - unassignedGroups[index]) > gt.tableSize()) { - throw runtime_error_f("Splitting collection resulted in a larger group number (%d, %d of them unassigned) than there is rows in the grouping table (%d).", groups[index].size(), unassignedGroups[index], gt.tableSize()); + if (groups[index].size() > gt.tableSize()) { + throw runtime_error_f("Splitting collection resulted in a larger group number (%d) than there is rows in the grouping table (%d).", groups[index].size(), gt.tableSize()); }; } }; @@ -331,8 +312,6 @@ struct AnalysisDataProcessorBuilder { (extractor(x), ...); }, at); - - (changeShifts(), ...); } template @@ -410,12 +389,6 @@ struct AnalysisDataProcessorBuilder { } else { pos = position; } - if (unassignedGroups[index] > 0) { - if ((idValues[index])[pos + shifts[index]] < 0) { - ++shifts[index]; - } - pos += shifts[index]; - } if constexpr (soa::is_soa_filtered_t>::value) { auto groupedElementsTable = arrow::util::get>(((groups[index])[pos]).value); @@ -446,14 +419,10 @@ struct AnalysisDataProcessorBuilder { typename grouping_t::iterator mGroupingElement; uint64_t position = 0; soa::SelectionVector const* groupSelection = nullptr; - std::array, sizeof...(A)> groups; - std::array, sizeof...(A)> idValues; std::array, sizeof...(A)> offsets; std::array selections; std::array starts; - std::array unassignedGroups{0}; - std::array shifts{0}; }; GroupSlicerIterator& begin() diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index bd714098b7692..2a3b989241724 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -35,8 +35,9 @@ auto sliceByColumn( std::shared_ptr const& input, T fullSize, std::vector* slices, - std::vector* vals = nullptr, - std::vector* offsets = nullptr) + std::vector* offsets = nullptr, + std::vector* unassignedSlices = nullptr, + std::vector* unassignedOffsets = nullptr) { arrow::Datum value_counts; auto options = arrow::compute::CountOptions::Defaults(); @@ -49,37 +50,42 @@ auto sliceByColumn( // create slices and offsets uint64_t offset = 0; + uint64_t unassignedOffset = 0; auto count = 0; - auto size = values.length(); - if (vals != nullptr) { - for (auto i = 0; i < size; ++i) { - vals->push_back(values.Value(i)); - } - } - auto makeSlice = [&](uint64_t offset_, T count) { - slices->emplace_back(arrow::Datum{input->Slice(offset_, count)}); + auto makeSlice = [&](uint64_t offset_, T count_) { + slices->emplace_back(arrow::Datum{input->Slice(offset_, count_)}); if (offsets) { offsets->emplace_back(offset_); } }; - auto v = values.Value(0); + auto makeUnassignedSlice = [&](uint64_t offset_, T count_) { + if (unassignedSlices) { + unassignedSlices->emplace_back(arrow::Datum{input->Slice(offset_, count_)}); + } + if (unassignedOffsets) { + unassignedOffsets->emplace_back(offset_); + } + }; + + auto v = 0; auto vprev = v; - auto vnext = v; auto nzeros = 0; - for (auto i = 0; i < size - 1; ++i) { - nzeros = 0; - vprev = v; - v = values.Value(i); + + for (auto i = 0; i < size; ++i) { count = counts.Value(i); + if (v >= 0) { + vprev = v; + } + v = values.Value(i); if (v < 0) { - vnext = values.Value(i + 1); - nzeros = vnext - vprev - 1; - } else if (vprev >= 0 && (v - vprev) != 1) { - nzeros = v - vprev - 1; + makeUnassignedSlice(offset, count); + offset += count; + continue; } + nzeros = v - vprev - ((i == 0) ? 0 : 1); for (auto z = 0; z < nzeros; ++z) { makeSlice(offset, 0); } @@ -87,9 +93,6 @@ auto sliceByColumn( offset += count; } - makeSlice(offset, counts.Value(size - 1)); - offset += counts.Value(size - 1); - if (values.Value(size - 1) < fullSize - 1) { for (auto v = values.Value(size - 1) + 1; v < fullSize; ++v) { makeSlice(offset, 0); @@ -98,7 +101,6 @@ auto sliceByColumn( return arrow::Status::OK(); } - } // namespace o2::framework #endif // O2_FRAMEWORK_KERNELS_H_ diff --git a/Framework/Core/test/test_Kernels.cxx b/Framework/Core/test/test_Kernels.cxx index 68cee1136534c..75bef63c6ad8e 100644 --- a/Framework/Core/test/test_Kernels.cxx +++ b/Framework/Core/test/test_Kernels.cxx @@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE(TestSlicingFramework) std::vector offsets; std::vector slices; - auto status = sliceByColumn("x", table, 12, &slices, nullptr, &offsets); + auto status = sliceByColumn("x", table, 12, &slices, &offsets); BOOST_REQUIRE(status.ok()); BOOST_REQUIRE_EQUAL(slices.size(), 12); std::array sizes{0, 4, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0}; From d3cfe71762b3c54ae990122dd736a8f02cc352ce Mon Sep 17 00:00:00 2001 From: Anton Alkin Date: Sat, 3 Jul 2021 18:30:32 +0200 Subject: [PATCH 7/7] fix GroupSlicer test --- Framework/Core/test/test_GroupSlicer.cxx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 12855c752b529..f3b753c7eb0ec 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -367,7 +367,7 @@ BOOST_AUTO_TEST_CASE(ArrowDirectSlicing) std::vector slices; std::vector offsts; - auto status = sliceByColumn("fID", b_e.asArrowTable(), 20, &slices, nullptr, &offsts); + auto status = sliceByColumn("fID", b_e.asArrowTable(), 20, &slices, &offsts); for (auto i = 0u; i < 5; ++i) { auto tbl = arrow::util::get>(slices[i].value); auto ca = tbl->GetColumnByName("fArr");