From 2882cb4fac72b2d1217e09ad51eba62043e54b2f Mon Sep 17 00:00:00 2001 From: Alessandro Molina Date: Wed, 10 Nov 2021 13:04:25 +0100 Subject: [PATCH 01/24] ARROW-14656: [Python] sort_by helper for StructArray --- python/pyarrow/array.pxi | 19 +++++++++++++++++++ python/pyarrow/tests/test_array.py | 14 ++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index de0d3a74dfb..85dde338419 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2370,6 +2370,25 @@ cdef class StructArray(Array): result.validate() return result + def sort_by(self, fieldname, order="ascending"): + """ + Sort the StructArray by one of its fields. + + Parameters + ---------- + fieldname : str + The name of the field use to sort the StructArray. + order : "ascending" or "descending" + The order of the sorting. + + Returns + ------- + result : StructArray + """ + field = self.field(fieldname) + indices = _pc().array_sort_indices(field, order=order) + return self.take(indices) + cdef class ExtensionArray(Array): """ diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 6b87a0ba9b0..c4b5c8fb389 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -724,6 +724,20 @@ def test_struct_array_from_chunked(): pa.StructArray.from_arrays([chunked_arr], ["foo"]) +def test_struct_array_sort(): + arr = pa.StructArray.from_arrays([ + pa.array([5, 7, 35], type=pa.int64()), + pa.array(["foo", "bar", "foobar"]) + ], names=["a", "b"]) + + sorted_arr = arr.sort_by("a", "descending") + assert sorted_arr.to_pylist() == [ + {"a": 35, "b": "foobar"}, + {"a": 7, "b": "bar"}, + {"a": 5, "b": "foo"}, + ] + + def test_dictionary_from_numpy(): indices = np.repeat([0, 1, 2], 2) dictionary = np.array(['foo', 'bar', 'baz'], dtype=object) From 291c078c01652914dbd6fd91386d91a0ffece392 Mon Sep 17 00:00:00 2001 From: Alessandro Molina Date: Fri, 12 Nov 2021 17:18:11 +0100 Subject: [PATCH 02/24] Incomplete steps forward --- .../compute/kernels/vector_array_sort.cc | 8 +++++ python/pyarrow/array.pxi | 33 +++++++++++++++---- python/pyarrow/table.pxi | 16 +++++++++ python/pyarrow/tests/test_array.py | 19 ++++++++++- 4 files changed, 69 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_array_sort.cc b/cpp/src/arrow/compute/kernels/vector_array_sort.cc index 6d809102769..8ee2a86ce7e 100644 --- a/cpp/src/arrow/compute/kernels/vector_array_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_array_sort.cc @@ -502,6 +502,13 @@ const ArraySortOptions* GetDefaultArraySortOptions() { return &kDefaultArraySortOptions; } +template