From 0ec4c5a17bd60e163eb71a0f00d9c73c525c3b8c Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Thu, 7 Jan 2021 16:27:27 -0500 Subject: [PATCH 1/2] ARROW-11166: [Python] Add binding for ProjectOptions --- python/pyarrow/_compute.pyx | 20 ++++++++++++++++++++ python/pyarrow/compute.py | 1 + python/pyarrow/includes/libarrow.pxd | 5 +++++ 3 files changed, 26 insertions(+) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index c975901c3cd..3772ab24c91 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -665,6 +665,26 @@ class PartitionNthOptions(_PartitionNthOptions): self._set_options(pivot) +cdef class _ProjectOptions(FunctionOptions): + cdef: + unique_ptr[CProjectOptions] project_options + + cdef const CFunctionOptions* get_options(self) except NULL: + return self.project_options.get() + + def _set_options(self, field_names): + cdef: + vector[c_string] c_field_names + for n in field_names: + c_field_names.push_back(tobytes(n)) + self.project_options.reset(new CProjectOptions(field_names)) + + +class ProjectOptions(_ProjectOptions): + def __init__(self, field_names): + self._set_options(field_names) + + cdef class _MinMaxOptions(FunctionOptions): cdef: CMinMaxOptions min_max_options diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 5127bbd8f3d..dcd1c90a2af 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -36,6 +36,7 @@ MinMaxOptions, ModeOptions, PartitionNthOptions, + ProjectOptions, SetLookupOptions, StrptimeOptions, TakeOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8cc9c960a86..58ff6ce7c54 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1813,6 +1813,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CPartitionNthOptions(int64_t pivot) int64_t pivot + cdef cppclass CProjectOptions \ + "arrow::compute::ProjectOptions"(CFunctionOptions): + CProjectOptions(vector[c_string] field_names) + vector[c_string] field_names + ctypedef enum CSortOrder" arrow::compute::SortOrder": CSortOrder_Ascending \ "arrow::compute::SortOrder::Ascending" From 5a168c743ed0addb1b7802f29d68b1e0e1146033 Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Fri, 8 Jan 2021 10:43:32 -0500 Subject: [PATCH 2/2] add documentation for project --- docs/source/cpp/compute.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 472859708f0..158297b1e9f 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -461,6 +461,8 @@ Structural transforms +--------------------------+------------+------------------------------------------------+---------------------+---------+ | list_value_length | Unary | List-like | Int32 or Int64 | \(5) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ +| project | Varargs | Any | Struct | \(6) | ++--------------------------+------------+------------------------------------------------+---------------------+---------+ * \(1) First input must be an array, second input a scalar of the same type. Output is an array of the same type as the inputs, and with the same values @@ -475,6 +477,11 @@ Structural transforms * \(5) Each output element is the length of the corresponding input element (null if input is null). Output type is Int32 for List, Int64 for LargeList. +* \(6) The output struct's field types are the types of its arguments. The + field names are specified using an instance of :struct:`ProjectOptions`. + The output shape will be scalar if all inputs are scalar, otherwise any + scalars will be broadcast to arrays. + Conversions ~~~~~~~~~~~