diff --git a/Framework/Core/include/Framework/AnalysisTask.h b/Framework/Core/include/Framework/AnalysisTask.h index d9621347cdaba..e9b4d9d17ec7d 100644 --- a/Framework/Core/include/Framework/AnalysisTask.h +++ b/Framework/Core/include/Framework/AnalysisTask.h @@ -262,23 +262,6 @@ struct AnalysisDataProcessorBuilder { } } - template - auto changeShifts() - { - constexpr auto index = framework::has_type_at_v(associated_pack_t{}); - if (unassignedGroups[index] > 0) { - uint64_t pos; - if constexpr (soa::is_soa_filtered_t>::value) { - pos = (*groupSelection)[position]; - } else { - pos = position; - } - if ((idValues[index])[pos] < 0) { - ++shifts[index]; - } - } - } - GroupSlicerIterator(G& gt, std::tuple& at) : mAt{&at}, mGroupingElement{gt.begin()}, @@ -299,14 +282,12 @@ struct AnalysisDataProcessorBuilder { x.asArrowTable(), static_cast(gt.tableSize()), &groups[index], - &idValues[index], &offsets[index]); if (result.ok() == false) { throw runtime_error("Cannot split collection"); } - unassignedGroups[index] = std::count_if(idValues[index].begin(), idValues[index].end(), [](auto&& x) { return x < 0; }); - if ((groups[index].size() - unassignedGroups[index]) > gt.tableSize()) { - throw runtime_error_f("Splitting collection resulted in a larger group number (%d, %d of them unassigned) than there is rows in the grouping table (%d).", groups[index].size(), unassignedGroups[index], gt.tableSize()); + if (groups[index].size() > gt.tableSize()) { + throw runtime_error_f("Splitting collection resulted in a larger group number (%d) than there is rows in the grouping table (%d).", groups[index].size(), gt.tableSize()); }; } }; @@ -331,8 +312,6 @@ struct AnalysisDataProcessorBuilder { (extractor(x), ...); }, at); - - (changeShifts(), ...); } template @@ -410,12 +389,6 @@ struct AnalysisDataProcessorBuilder { } else { pos = position; } - if (unassignedGroups[index] > 0) { - if ((idValues[index])[pos + shifts[index]] < 0) { - ++shifts[index]; - } - pos += shifts[index]; - } if constexpr (soa::is_soa_filtered_t>::value) { auto groupedElementsTable = arrow::util::get>(((groups[index])[pos]).value); @@ -446,14 +419,10 @@ struct AnalysisDataProcessorBuilder { typename grouping_t::iterator mGroupingElement; uint64_t position = 0; soa::SelectionVector const* groupSelection = nullptr; - std::array, sizeof...(A)> groups; - std::array, sizeof...(A)> idValues; std::array, sizeof...(A)> offsets; std::array selections; std::array starts; - std::array unassignedGroups{0}; - std::array shifts{0}; }; GroupSlicerIterator& begin() diff --git a/Framework/Core/include/Framework/Kernels.h b/Framework/Core/include/Framework/Kernels.h index 475fcb641d2e6..2a3b989241724 100644 --- a/Framework/Core/include/Framework/Kernels.h +++ b/Framework/Core/include/Framework/Kernels.h @@ -30,12 +30,14 @@ namespace o2::framework /// @a offset the offset in the original table at which the corresponding /// slice was split. template -auto sliceByColumn(char const* key, - std::shared_ptr const& input, - T fullSize, - std::vector* slices, - std::vector* vals = nullptr, - std::vector* offsets = nullptr) +auto sliceByColumn( + char const* key, + std::shared_ptr const& input, + T fullSize, + std::vector* slices, + std::vector* offsets = nullptr, + std::vector* unassignedSlices = nullptr, + std::vector* unassignedOffsets = nullptr) { arrow::Datum value_counts; auto options = arrow::compute::CountOptions::Defaults(); @@ -47,53 +49,58 @@ auto sliceByColumn(char const* key, auto counts = static_cast>(pair.field(1)->data()); // create slices and offsets - auto offset = 0; + uint64_t offset = 0; + uint64_t unassignedOffset = 0; auto count = 0; - auto size = values.length(); - if (vals != nullptr) { - for (auto i = 0; i < size; ++i) { - vals->push_back(values.Value(i)); - } - } - auto makeSlice = [&](T count) { - slices->emplace_back(arrow::Datum{input->Slice(offset, count)}); + auto makeSlice = [&](uint64_t offset_, T count_) { + slices->emplace_back(arrow::Datum{input->Slice(offset_, count_)}); if (offsets) { - offsets->emplace_back(offset); + offsets->emplace_back(offset_); } }; - auto current = 0; - auto v = values.Value(0); - while (v - current >= 1) { - makeSlice(0); - ++current; - } + auto makeUnassignedSlice = [&](uint64_t offset_, T count_) { + if (unassignedSlices) { + unassignedSlices->emplace_back(arrow::Datum{input->Slice(offset_, count_)}); + } + if (unassignedOffsets) { + unassignedOffsets->emplace_back(offset_); + } + }; - for (auto r = 0; r < size - 1; ++r) { - count = counts.Value(r); - makeSlice(count); - offset += count; - auto nextValue = values.Value(r + 1); - auto value = values.Value(r); - while (nextValue - value > 1) { - makeSlice(0); - ++value; + auto v = 0; + auto vprev = v; + auto nzeros = 0; + + for (auto i = 0; i < size; ++i) { + count = counts.Value(i); + if (v >= 0) { + vprev = v; + } + v = values.Value(i); + if (v < 0) { + makeUnassignedSlice(offset, count); + offset += count; + continue; } + nzeros = v - vprev - ((i == 0) ? 0 : 1); + for (auto z = 0; z < nzeros; ++z) { + makeSlice(offset, 0); + } + makeSlice(offset, count); + offset += count; } - makeSlice(counts.Value(size - 1)); - offset += counts.Value(size - 1); if (values.Value(size - 1) < fullSize - 1) { for (auto v = values.Value(size - 1) + 1; v < fullSize; ++v) { - makeSlice(0); + makeSlice(offset, 0); } } return arrow::Status::OK(); } - } // namespace o2::framework #endif // O2_FRAMEWORK_KERNELS_H_ diff --git a/Framework/Core/test/test_GroupSlicer.cxx b/Framework/Core/test/test_GroupSlicer.cxx index 12855c752b529..f3b753c7eb0ec 100644 --- a/Framework/Core/test/test_GroupSlicer.cxx +++ b/Framework/Core/test/test_GroupSlicer.cxx @@ -367,7 +367,7 @@ BOOST_AUTO_TEST_CASE(ArrowDirectSlicing) std::vector slices; std::vector offsts; - auto status = sliceByColumn("fID", b_e.asArrowTable(), 20, &slices, nullptr, &offsts); + auto status = sliceByColumn("fID", b_e.asArrowTable(), 20, &slices, &offsts); for (auto i = 0u; i < 5; ++i) { auto tbl = arrow::util::get>(slices[i].value); auto ca = tbl->GetColumnByName("fArr"); diff --git a/Framework/Core/test/test_Kernels.cxx b/Framework/Core/test/test_Kernels.cxx index 68cee1136534c..75bef63c6ad8e 100644 --- a/Framework/Core/test/test_Kernels.cxx +++ b/Framework/Core/test/test_Kernels.cxx @@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE(TestSlicingFramework) std::vector offsets; std::vector slices; - auto status = sliceByColumn("x", table, 12, &slices, nullptr, &offsets); + auto status = sliceByColumn("x", table, 12, &slices, &offsets); BOOST_REQUIRE(status.ok()); BOOST_REQUIRE_EQUAL(slices.size(), 12); std::array sizes{0, 4, 1, 0, 1, 2, 0, 0, 0, 0, 0, 0};