Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2882cb4
ARROW-14656: [Python] sort_by helper for StructArray
amol- Nov 10, 2021
291c078
Incomplete steps forward
amol- Nov 12, 2021
bc493ef
use sort_indices for ChunkedArray
amol- Nov 22, 2021
5c6280b
Proof of concept for StructArray
amol- Nov 22, 2021
f5148d0
in progress
amol- Nov 30, 2021
c4305a1
Continue with refactoring
amol- Dec 2, 2021
d638ee6
remove for the moment
amol- Dec 2, 2021
68e54b8
Add unit test
amol- Dec 2, 2021
f8d6478
Change StructArrayCompareSorter to use NestedValuesComparator, add
Jedi18 Dec 8, 2021
0fb14a4
chunked array sort test runs
Jedi18 Dec 27, 2021
011f47b
changes to chunked array sort and add test and sort function for Reco…
Jedi18 Dec 30, 2021
03539c7
add small table sorting test, minor changes to RecordBatch sort
Jedi18 Dec 30, 2021
1ef9769
clang format fix
Jedi18 Jan 3, 2022
7806ab1
Merge branch 'master' into ARROW-14656
Jedi18 Jan 9, 2022
69d27fe
modify chunked arrayi of struct sorting
Jedi18 Jan 9, 2022
f52640f
Merge branch 'master' into ARROW-14656
Jedi18 Jan 22, 2022
109c2f4
remove internal tests for nestedvaluescomparator
Jedi18 Jan 23, 2022
751cb88
update recordbatch to use nestedvalueocmparator
Jedi18 Jan 24, 2022
78eede2
Merge branch 'master' into ARROW-14656
Jedi18 Jan 24, 2022
9568759
radix record batch sorter working with nestedvaluescomparator
Jedi18 Jan 25, 2022
7a8f222
use nestedvaluescomparator in MultipleKeyRecordSorter and add small test
Jedi18 Jan 25, 2022
96a49c1
use NestedValuesComparator in MultipleKeyRecordBatchSorter
Jedi18 Jan 29, 2022
5e93e03
use nestedvaluescomparator in chunked array struct array sorting
Jedi18 Jan 29, 2022
b9b57e9
add support for nulltype array in nestedvaluescomparator
Jedi18 Jan 30, 2022
8c66028
fix selectkth record batch bug
Jedi18 Feb 1, 2022
df33cd1
Merge branch 'master' into ARROW-14656
Jedi18 Feb 3, 2022
db936ad
handle return status of prepare call
Jedi18 Feb 3, 2022
87c3041
add visit overloads for temporal types
Jedi18 Feb 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions cpp/src/arrow/compute/kernels/chunked_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,19 @@ struct ResolvedChunk {
LogicalValueType Value() const { return V::LogicalValue(array->GetView(index)); }
};

// ResolvedChunk specialization for StructArray
template <>
struct ResolvedChunk<StructArray> {
// The target struct in chunked array.
const StructArray* array;
// The field index in the target struct.
const int64_t index;

ResolvedChunk(const StructArray* array, int64_t index) : array(array), index(index) {}

bool IsNull() const { return array->field(0)->IsNull(index); }
};

// ResolvedChunk specialization for untyped arrays when all is needed is null lookup
template <>
struct ResolvedChunk<Array> {
Expand Down
56 changes: 56 additions & 0 deletions cpp/src/arrow/compute/kernels/vector_array_sort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,49 @@ class ArrayCompareSorter {
}
};

template <typename ArrowType>
class StructArrayCompareSorter {
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;

public:
// `offset` is used when this is called on a chunk of a chunked array
NullPartitionResult operator()(uint64_t* indices_begin, uint64_t* indices_end,
const Array& array, int64_t offset,
const ArraySortOptions& options) {
const auto& values = checked_cast<const ArrayType&>(array);
nested_value_comparator_ = std::make_shared<NestedValuesComparator>();

if (nested_value_comparator_->Prepare(values) != Status::OK()) {
// TODO: Improve error handling
return NullPartitionResult();
}

const auto p = PartitionNulls<ArrayType, StablePartitioner>(
indices_begin, indices_end, values, offset, options.null_placement);

bool asc_order = options.order == SortOrder::Ascending;
std::stable_sort(p.non_nulls_begin, p.non_nulls_end,
[&offset, &values, asc_order, this](uint64_t left, uint64_t right) {
// is better to do values.fields.size() or
// values.schema().num_fields() ?
for (ArrayVector::size_type fieldidx = 0;
fieldidx < values.fields().size(); ++fieldidx) {
int result = nested_value_comparator_->Compare(
values, fieldidx, offset, asc_order ? left : right,
asc_order ? right : left);
if (result == -1)
return true;
else if (result == 1)
return false;
}
return false;
});
return p;
}

std::shared_ptr<NestedValuesComparator> nested_value_comparator_;
};

template <typename ArrowType>
class ArrayCountSorter {
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
Expand Down Expand Up @@ -410,6 +453,11 @@ struct ArraySorter<
ArrayCompareSorter<Type> impl;
};

template <typename Type>
struct ArraySorter<Type, enable_if_t<is_struct_type<Type>::value>> {
StructArrayCompareSorter<Type> impl;
};

struct ArraySorterFactory {
ArraySortFunc sorter;

Expand Down Expand Up @@ -511,6 +559,13 @@ const ArraySortOptions* GetDefaultArraySortOptions() {
return &kDefaultArraySortOptions;
}

template <template <typename...> class ExecTemplate>
void AddArraySortingNestedKernels(VectorKernel base, VectorFunction* func) {
base.signature = KernelSignature::Make({InputType::Array(Type::STRUCT)}, uint64());
base.exec = ExecTemplate<UInt64Type, StructType>::Exec;
DCHECK_OK(func->AddKernel(base));
}

const FunctionDoc array_sort_indices_doc(
"Return the indices that would sort an array",
("This function computes an array of indices that define a stable sort\n"
Expand Down Expand Up @@ -559,6 +614,7 @@ void RegisterVectorArraySort(FunctionRegistry* registry) {
GetDefaultArraySortOptions());
base.init = ArraySortIndicesState::Init;
AddArraySortingKernels<ArraySortIndices>(base, array_sort_indices.get());
AddArraySortingNestedKernels<ArraySortIndices>(base, array_sort_indices.get());
DCHECK_OK(registry->AddFunction(std::move(array_sort_indices)));

// partition_nth_indices has a parameter so needs its init function
Expand Down
Loading