-
Notifications
You must be signed in to change notification settings - Fork 4k
ARROW-8199: [C++] Add support for multi-column sort indices on Table #8612
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
85bed4d
50dd97f
121b51b
e4e5d68
b5d234f
8c9c7c0
96d0e52
7b4b7ae
67a3d40
3bce727
9289fdc
3285131
4a76c39
efa3ee6
2c4d236
0d097db
493313a
628e3ec
629e291
ff81915
8170331
ac0f654
f45664e
3631766
eca3902
af9c880
aa368eb
8deb74c
e9c03d2
f84f748
a7eb62f
e9b481c
bbda240
e3ed89a
5ca9e8d
fecd557
8e5026a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,7 +16,7 @@ | |
| # under the License. | ||
|
|
||
| aws-sdk-cpp | ||
| benchmark=1.4.1 | ||
| benchmark=1.5.2 | ||
| boost-cpp>=1.68.0 | ||
| brotli | ||
| bzip2 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,6 +58,34 @@ struct ARROW_EXPORT TakeOptions : public FunctionOptions { | |
| static TakeOptions Defaults() { return BoundsCheck(); } | ||
| }; | ||
|
|
||
| enum class SortOrder { | ||
| Ascending, | ||
| Descending, | ||
| }; | ||
|
|
||
| /// \brief One sort key for PartitionNthIndices (TODO) and SortIndices | ||
| struct ARROW_EXPORT SortKey { | ||
| explicit SortKey(std::string name, SortOrder order = SortOrder::Ascending) | ||
| : name(name), order(order) {} | ||
|
|
||
| /// The name of the sort column. | ||
| std::string name; | ||
| /// How to order by this sort key. | ||
| SortOrder order; | ||
| }; | ||
|
|
||
| struct ARROW_EXPORT ArraySortOptions : public FunctionOptions { | ||
| explicit ArraySortOptions(SortOrder order = SortOrder::Ascending) : order(order) {} | ||
|
|
||
| SortOrder order; | ||
| }; | ||
|
|
||
| struct ARROW_EXPORT SortOptions : public FunctionOptions { | ||
|
||
| explicit SortOptions(std::vector<SortKey> sort_keys = {}) : sort_keys(sort_keys) {} | ||
|
|
||
| std::vector<SortKey> sort_keys; | ||
| }; | ||
|
|
||
| /// \brief Partitioning options for NthToIndices | ||
| struct ARROW_EXPORT PartitionNthOptions : public FunctionOptions { | ||
| explicit PartitionNthOptions(int64_t pivot) : pivot(pivot) {} | ||
|
|
@@ -152,21 +180,71 @@ ARROW_EXPORT | |
| Result<std::shared_ptr<Array>> NthToIndices(const Array& values, int64_t n, | ||
| ExecContext* ctx = NULLPTR); | ||
|
|
||
| /// \brief Returns the indices that would sort an array. | ||
| /// \brief Returns the indices that would sort an array in the | ||
| /// specified order. | ||
| /// | ||
| /// Perform an indirect sort of array. The output array will contain | ||
| /// indices that would sort an array, which would be the same length | ||
| /// as input. Nulls will be stably partitioned to the end of the output. | ||
| /// as input. Nulls will be stably partitioned to the end of the output | ||
| /// regardless of order. | ||
| /// | ||
| /// For example given values = [null, 1, 3.3, null, 2, 5.3], the output | ||
| /// will be [1, 4, 2, 5, 0, 3] | ||
| /// For example given array = [null, 1, 3.3, null, 2, 5.3] and order | ||
| /// = SortOrder::DESCENDING, the output will be [5, 2, 4, 1, 0, | ||
| /// 3]. | ||
| /// | ||
| /// \param[in] values array to sort | ||
| /// \param[in] array array to sort | ||
| /// \param[in] order ascending or descending | ||
| /// \param[in] ctx the function execution context, optional | ||
| /// \return offsets indices that would sort an array | ||
| ARROW_EXPORT | ||
| Result<std::shared_ptr<Array>> SortToIndices(const Array& values, | ||
| ExecContext* ctx = NULLPTR); | ||
| Result<std::shared_ptr<Array>> SortIndices(const Array& array, | ||
| SortOrder order = SortOrder::Ascending, | ||
| ExecContext* ctx = NULLPTR); | ||
|
|
||
| /// \brief Returns the indices that would sort a chunked array in the | ||
| /// specified order. | ||
| /// | ||
| /// Perform an indirect sort of chunked array. The output array will | ||
| /// contain indices that would sort a chunked array, which would be | ||
| /// the same length as input. Nulls will be stably partitioned to the | ||
| /// end of the output regardless of order. | ||
| /// | ||
| /// For example given chunked_array = [[null, 1], [3.3], [null, 2, | ||
| /// 5.3]] and order = SortOrder::DESCENDING, the output will be [5, 2, | ||
| /// 4, 1, 0, 3]. | ||
| /// | ||
| /// \param[in] chunked_array chunked array to sort | ||
| /// \param[in] order ascending or descending | ||
| /// \param[in] ctx the function execution context, optional | ||
| /// \return offsets indices that would sort an array | ||
| ARROW_EXPORT | ||
| Result<std::shared_ptr<Array>> SortIndices(const ChunkedArray& chunked_array, | ||
| SortOrder order = SortOrder::Ascending, | ||
| ExecContext* ctx = NULLPTR); | ||
|
|
||
| /// \brief Returns the indices that would sort a table in the | ||
| /// specified order. | ||
| /// | ||
| /// Perform an indirect sort of table. The output array will contain | ||
| /// indices that would sort a table, which would be the same length as | ||
| /// input. Nulls will be stably partitioned to the end of the output | ||
| /// regardless of order. | ||
| /// | ||
| /// For example given table = { | ||
| /// "column1": [[null, 1], [ 3, null, 2, 1]], | ||
| /// "column2": [[ 5], [3, null, null, 5, 5]], | ||
| /// } and options = { | ||
| /// {"column1", SortOrder::Ascending}, | ||
| /// {"column2", SortOrder::Descending}, | ||
| /// }, the output will be [5, 1, 4, 2, 0, 3]. | ||
| /// | ||
| /// \param[in] table table to sort | ||
| /// \param[in] options options | ||
| /// \param[in] ctx the function execution context, optional | ||
| /// \return offsets indices that would sort a table | ||
| ARROW_EXPORT | ||
| Result<std::shared_ptr<Array>> SortIndices(const Table& table, const SortOptions& options, | ||
| ExecContext* ctx = NULLPTR); | ||
|
|
||
| /// \brief Compute unique elements from an array-like object | ||
| /// | ||
|
|
@@ -254,5 +332,10 @@ Result<std::shared_ptr<Table>> Take(const Table& table, const ChunkedArray& indi | |
| const TakeOptions& options = TakeOptions::Defaults(), | ||
| ExecContext* context = NULLPTR); | ||
|
|
||
| ARROW_DEPRECATED("Deprecated in 3.0.0. Use SortIndices()") | ||
| ARROW_EXPORT | ||
| Result<std::shared_ptr<Array>> SortToIndices(const Array& values, | ||
| ExecContext* ctx = NULLPTR); | ||
|
|
||
| } // namespace compute | ||
| } // namespace arrow | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This hunk is for
SortToIndices()->SortIndices()rename.