From 49f9bc1b0d7bbe9285d8a2b799794c164a8ff92a Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 7 Sep 2020 23:30:02 -0400 Subject: [PATCH 01/18] ARROW-9967: [Python] Add compute module documentation --- docs/source/cpp/compute.rst | 5 + docs/source/python/api/compute.rst | 206 +++++++++++++++++++++++++++++ docs/source/python/compute.rst | 54 ++++++++ 3 files changed, 265 insertions(+) create mode 100644 docs/source/python/api/compute.rst create mode 100644 docs/source/python/compute.rst diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 6ef10abf67d..49f9c867e55 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -205,6 +205,11 @@ an ``Invalid`` :class:`Status` when overflow is detected. +--------------------------+------------+--------------------+---------------------+ | subtract_checked | Binary | Numeric | Numeric | +--------------------------+------------+--------------------+---------------------+ +| divide | Binary | Numeric | Numeric | ++--------------------------+------------+--------------------+---------------------+ +| divide_checked | Binary | Numeric | Numeric | ++--------------------------+------------+--------------------+---------------------+ + Comparisons ~~~~~~~~~~~ diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst new file mode 100644 index 00000000000..d8e47862361 --- /dev/null +++ b/docs/source/python/api/compute.rst @@ -0,0 +1,206 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _api.compute: +.. currentmodule:: pyarrow + +Compute Functions +================= + +Aggregations +------------ + +.. autosummary:: + :toctree: ../generated/ + + count + mean + min_max + sum + mode + +Arithmetic Functions +-------------------- + +By default these functions do not detect overflow. Each function is also +available in an overflow-checking variant, suffixed `_checked`, which +throws an `ArrowInvalid` exception when overflow is detected. + +.. autosummary:: + :toctree: ../generated/ + + add + add_checked + subtract + subtract_checked + multiply + multiply_checked + divide + divide_checked + +Comparisons +----------- + +These functions expect two inputs of the same type. If one of the inputs is `null` +they return `null`. + +.. autosummary:: + :toctree: ../generated/ + + equal + not_equal + greater + greater_equal + less + less_equal + + +Logical Functions +----------- + +These functions normally emit a null when one of the inputs is null. However, Kleene +logic variants are provided (suffixed `_kleene`). See User Guide for details. + +.. autosummary:: + :toctree: ../generated/ + + and_ + and_kleene + invert + or_ + or_kleene + xor + +String Predicates +----------------- + +In these functions an empty string emits false in the output. For ASCII +variants (prefixed `ascii_`) a string element with non-ASCII characters +emits false in the output. + +The first set of functions emit true if the input contains only +characters of a given class. + +.. autosummary:: + :toctree: ../generated/ + + ascii_is_alnum + ascii_is_alpha + ascii_is_decimal + ascii_is_lower + ascii_is_printable + ascii_is_space + ascii_is_upper + utf8_is_alnum + utf8_is_alpha + utf8_is_decimal + utf8_is_digit + utf8_is_lower + utf8_is_numeric + utf8_is_printable + utf8_is_space + utf8_is_upper + +The second set of functions also consider the order of characters +in the string element. + +.. autosummary:: + :toctree: ../generated/ + + ascii_is_title + utf8_is_title + +The third set of functions examines string elements on +a byte-by-byte basis. + +.. autosummary:: + :toctree: ../generated/ + + string_is_ascii + +String Transforms +----------------- + +.. autosummary:: + :toctree: ../generated/ + + ascii_lower + ascii_upper + binary_length + utf8_lower + utf8_upper + +Containment tests +----------------- + +.. autosummary:: + :toctree: ../generated/ + + match_substring + index_in + is_in + + +Conversions +----------- + +.. autosummary:: + :toctree: ../generated/ + + cast + strptime + +Selections +---------- + +.. autosummary:: + :toctree: ../generated/ + + filter + take + +Associative transforms +---------------------- + +.. autosummary:: + :toctree: ../generated/ + + dictionary_encode + unique + value_counts + +Sorts and partitions +-------------------- + +.. autosummary:: + :toctree: ../generated/ + + partition_nth_indices + sort_indices + +Structural Transforms +--------------------- + +.. autosummary:: + :toctree: ../generated/ + + fill_null + is_null + is_valid + list_value_length + list_flatten + list_parent_indices \ No newline at end of file diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst new file mode 100644 index 00000000000..53bb23c3329 --- /dev/null +++ b/docs/source/python/compute.rst @@ -0,0 +1,54 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. currentmodule:: pyarrow.compute +.. _compute: + +================= +Compute Functions +================= + +Arrow supports logical compute operations over inputs of possibly +varying types. Many compute functions support both array (chunked or not) +and scalar inputs, but some will mandate either. For example, +the ``fill_null`` function requires its second input to be a scalar, +while ``sort_indices`` requires its first and only input to +be an array. + +Below are a few simple examples: + + >>> import pyarrow as pa + >>> import pyarrow.compute as pc + >>> a = pa.array([1, 1, 2, 3]) + >>> pc.sum(a) + + >>> b = pa.array([4, 5, 8, 2]) + + [ + 4, + 5, + 16, + 6 + ] + >>> x, y = pa.scalar(7.8), pa.scalar(9.3) + >>> pc.multiply(x, y) + + + +.. seealso:: + A comprehensive description of compute functions can be found in the + C++ implementation docs. From ddd6dada7f73b90cb000600ab7d86b6e44fe7339 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 14 Sep 2020 16:35:22 -0400 Subject: [PATCH 02/18] feedback --- docs/source/cpp/compute.rst | 4 ++-- docs/source/python/api/compute.rst | 12 ++++++------ docs/source/python/compute.rst | 15 ++++++++------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 49f9c867e55..c947048d375 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -31,8 +31,8 @@ The generic Compute API Functions and function registry ------------------------------- -Functions represent logical compute operations over inputs of possibly -varying types. Internally, a function is implemented by one or several +Functions represent compute operations over inputs of possibly varying +types. Internally, a function is implemented by one or several "kernels", depending on the concrete input types (for example, a function adding values from two inputs can have different kernels depending on whether the inputs are integral or floating-point). diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index d8e47862361..b3e9192029b 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -37,8 +37,8 @@ Arithmetic Functions -------------------- By default these functions do not detect overflow. Each function is also -available in an overflow-checking variant, suffixed `_checked`, which -throws an `ArrowInvalid` exception when overflow is detected. +available in an overflow-checking variant, suffixed ``_checked``, which +throws an ``ArrowInvalid`` exception when overflow is detected. .. autosummary:: :toctree: ../generated/ @@ -56,7 +56,7 @@ Comparisons ----------- These functions expect two inputs of the same type. If one of the inputs is `null` -they return `null`. +they return ``null``. .. autosummary:: :toctree: ../generated/ @@ -73,7 +73,7 @@ Logical Functions ----------- These functions normally emit a null when one of the inputs is null. However, Kleene -logic variants are provided (suffixed `_kleene`). See User Guide for details. +logic variants are provided (suffixed ``_kleene``). See User Guide for details. .. autosummary:: :toctree: ../generated/ @@ -89,7 +89,7 @@ String Predicates ----------------- In these functions an empty string emits false in the output. For ASCII -variants (prefixed `ascii_`) a string element with non-ASCII characters +variants (prefixed ``ascii_``) a string element with non-ASCII characters emits false in the output. The first set of functions emit true if the input contains only @@ -140,7 +140,6 @@ String Transforms ascii_lower ascii_upper - binary_length utf8_lower utf8_upper @@ -201,6 +200,7 @@ Structural Transforms fill_null is_null is_valid + binary_length list_value_length list_flatten list_parent_indices \ No newline at end of file diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index 53bb23c3329..73ee2c2b3de 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -36,14 +36,15 @@ Below are a few simple examples: >>> a = pa.array([1, 1, 2, 3]) >>> pc.sum(a) - >>> b = pa.array([4, 5, 8, 2]) - + >>> b = pa.array([4, 1, 2, 8]) + >>> pc.equal(a, b) + [ - 4, - 5, - 16, - 6 - ] + false, + true, + true, + false + ] >>> x, y = pa.scalar(7.8), pa.scalar(9.3) >>> pc.multiply(x, y) From 15edf686acd63949486266ffee326a9819d7fae9 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 14 Sep 2020 22:38:39 -0400 Subject: [PATCH 03/18] add explicit wrappers for and_ and or_ --- python/pyarrow/compute.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 2204471b0ee..0f907225ff5 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -293,6 +293,44 @@ def mode(array): """ return call_function("mode", [array]) +def and_(x1, x2): + """ + Compute the truth value of two boolean (chunked) arrays x1 AND x2 + element-wise. x1 and x2 must have the same length. The truth value + of two null elements is null. See `and_kleene` for an implementation + with alternative null handling. + + Parameters + ---------- + x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray + x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray + + Returns + ------- + result : pyarrow.BooleanArray or pyarrow.ChunkedArray + + """ + return call_function("and", [x1, x2]) + +def or_(x1, x2): + """ + Compute the truth value of two boolean (chunked) arrays x1 OR x2 + element-wise. x1 and x2 must have the same length. The truth value + of two null elements is null. See `or_kleene` for an implementation + with alternative null handling. + + Parameters + ---------- + x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray + x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray + + Returns + ------- + result : pyarrow.BooleanArray or pyarrow.ChunkedArray + + """ + return call_function("or", [x1, x2]) + def filter(data, mask, null_selection_behavior='drop'): """ From 6cf1a0e15c273f3382f283f721fe4e8fcc734b52 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 14 Sep 2020 23:51:08 -0400 Subject: [PATCH 04/18] linting --- python/pyarrow/compute.py | 4 +++- python/pyarrow/tests/test_compute.py | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 0f907225ff5..008d3b8a16a 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -293,9 +293,10 @@ def mode(array): """ return call_function("mode", [array]) + def and_(x1, x2): """ - Compute the truth value of two boolean (chunked) arrays x1 AND x2 + Compute the truth value of two boolean (chunked) arrays x1 AND x2 element-wise. x1 and x2 must have the same length. The truth value of two null elements is null. See `and_kleene` for an implementation with alternative null handling. @@ -312,6 +313,7 @@ def and_(x1, x2): """ return call_function("and", [x1, x2]) + def or_(x1, x2): """ Compute the truth value of two boolean (chunked) arrays x1 OR x2 diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 8b0859ccf39..a320bc190e9 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -18,6 +18,7 @@ from functools import lru_cache import pickle import pytest +import sys import textwrap import numpy as np @@ -822,3 +823,29 @@ def test_fill_null_chunked_array(arrow_type): result = arr.fill_null(pa.scalar(5, type='int8')) assert result.equals(expected) + + +def test_logical(): + + a = pa.array([True, False, False, None]) + b = pa.array([True, True, False, True]) + + assert pc.and_(a, b) == pa.array([True, False, False, None]) + assert pc.and_kleene(a, b) == pa.array([True, False, False, None]) + + assert pc.or_(a, b) == pa.array([True, True, False, None]) + assert pc.and_kleene(a, b) == pa.array([True, False, False, None]) + + assert pc.xor(a, b) == pa.array([False, True, False, None]) + + assert pc.invert(a) == pa.array([False, True, True, None]) + + +def test_cast(): + + a = pa.array([sys.maxsize], type='int64') + + with pytest.raises(pa.ArrowInvalid): + pc.cast(a, 'int32') + + assert pc.cast(a, 'int32', safe=False) == pa.array([-1], type='int32') From 0814bbd5786efdf2a4e76f0dff6b72c3f744058d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 00:08:04 -0400 Subject: [PATCH 05/18] test some wrappers --- python/pyarrow/tests/test_compute.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index a320bc190e9..bd6671e296c 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +from datetime import datetime from functools import lru_cache import pickle import pytest @@ -837,15 +838,23 @@ def test_logical(): assert pc.and_kleene(a, b) == pa.array([True, False, False, None]) assert pc.xor(a, b) == pa.array([False, True, False, None]) - + assert pc.invert(a) == pa.array([False, True, True, None]) - def test_cast(): - a = pa.array([sys.maxsize], type='int64') + arr = pa.array([sys.maxsize], type='int64') with pytest.raises(pa.ArrowInvalid): - pc.cast(a, 'int32') + pc.cast(arr, 'int32') + + assert pc.cast(arr, 'int32', safe=False) == pa.array([-1], type='int32') + + arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)]) + expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]') + assert pc.cast(arr, 'timestamp[ms]') == expected + +def test_sort_partition(): - assert pc.cast(a, 'int32', safe=False) == pa.array([-1], type='int32') + arr = pa.array([100, 99, 150, 200, 1]) + assert pc.sort_indices(arr) == pa.array([4, 1, 0, 2, 3]) From b7af6d9913d1bb2eb3a5759dac364a907928a951 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 15 Sep 2020 00:59:56 -0400 Subject: [PATCH 06/18] more linting --- python/pyarrow/tests/test_compute.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index bd6671e296c..3c61b663a6b 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -838,9 +838,10 @@ def test_logical(): assert pc.and_kleene(a, b) == pa.array([True, False, False, None]) assert pc.xor(a, b) == pa.array([False, True, False, None]) - + assert pc.invert(a) == pa.array([False, True, True, None]) + def test_cast(): arr = pa.array([sys.maxsize], type='int64') @@ -851,9 +852,10 @@ def test_cast(): assert pc.cast(arr, 'int32', safe=False) == pa.array([-1], type='int32') arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)]) - expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]') + expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]') assert pc.cast(arr, 'timestamp[ms]') == expected + def test_sort_partition(): arr = pa.array([100, 99, 150, 200, 1]) From f72f100be63149e618c36baaa2895d61d4ec04ad Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 16 Sep 2020 12:47:02 -0400 Subject: [PATCH 07/18] feedback --- docs/source/cpp/compute.rst | 5 ---- python/pyarrow/compute.py | 45 ++++------------------------ python/pyarrow/tests/test_compute.py | 6 ---- 3 files changed, 5 insertions(+), 51 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index c947048d375..62b413b40dd 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -205,11 +205,6 @@ an ``Invalid`` :class:`Status` when overflow is detected. +--------------------------+------------+--------------------+---------------------+ | subtract_checked | Binary | Numeric | Numeric | +--------------------------+------------+--------------------+---------------------+ -| divide | Binary | Numeric | Numeric | -+--------------------------+------------+--------------------+---------------------+ -| divide_checked | Binary | Numeric | Numeric | -+--------------------------+------------+--------------------+---------------------+ - Comparisons ~~~~~~~~~~~ diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 008d3b8a16a..3560369ac2f 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import sys from pyarrow._compute import ( # noqa Function, @@ -294,46 +295,6 @@ def mode(array): return call_function("mode", [array]) -def and_(x1, x2): - """ - Compute the truth value of two boolean (chunked) arrays x1 AND x2 - element-wise. x1 and x2 must have the same length. The truth value - of two null elements is null. See `and_kleene` for an implementation - with alternative null handling. - - Parameters - ---------- - x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray - x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray - - Returns - ------- - result : pyarrow.BooleanArray or pyarrow.ChunkedArray - - """ - return call_function("and", [x1, x2]) - - -def or_(x1, x2): - """ - Compute the truth value of two boolean (chunked) arrays x1 OR x2 - element-wise. x1 and x2 must have the same length. The truth value - of two null elements is null. See `or_kleene` for an implementation - with alternative null handling. - - Parameters - ---------- - x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray - x1 : pyarrow.BooleanArray or pyarrow.ChunkedArray - - Returns - ------- - result : pyarrow.BooleanArray or pyarrow.ChunkedArray - - """ - return call_function("or", [x1, x2]) - - def filter(data, mask, null_selection_behavior='drop'): """ Select values (or records) from array- or table-like data given boolean @@ -457,3 +418,7 @@ def fill_null(values, fill_value): fill_value = pa.scalar(fill_value.as_py(), type=values.type) return call_function("fill_null", [values, fill_value]) + + +and_ = getattr(sys.modules[__name__], 'and') +or_ = getattr(sys.modules[__name__], 'or') diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 3c61b663a6b..89b43dcaeb3 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -854,9 +854,3 @@ def test_cast(): arr = pa.array([datetime(2010, 1, 1), datetime(2015, 1, 1)]) expected = pa.array([1262304000000, 1420070400000], type='timestamp[ms]') assert pc.cast(arr, 'timestamp[ms]') == expected - - -def test_sort_partition(): - - arr = pa.array([100, 99, 150, 200, 1]) - assert pc.sort_indices(arr) == pa.array([4, 1, 0, 2, 3]) From 4d6a7d931d4cc3ba7d06bf73f4bc76e00bc64b14 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 17 Sep 2020 02:17:05 -0400 Subject: [PATCH 08/18] expose PartitionNthOptions --- cpp/src/arrow/compute/api_vector.h | 2 +- python/pyarrow/_compute.pyx | 9 +++++++++ python/pyarrow/includes/libarrow.pxd | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index de36202f019..2c77e8ee155 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -59,7 +59,7 @@ struct ARROW_EXPORT TakeOptions : public FunctionOptions { }; /// \brief Partitioning options for NthToIndices -struct PartitionNthOptions : public FunctionOptions { +struct ARROW_EXPORT PartitionNthOptions : public FunctionOptions { explicit PartitionNthOptions(int64_t pivot) : pivot(pivot) {} /// The index into the equivalent sorted array of the partition pivot element. diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 6fbe1581f6a..a19e1499851 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -592,6 +592,15 @@ cdef class TakeOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return &self.take_options +cdef class PartitionNthOptions(FunctionOptions): + cdef: + unique_ptr[CPartitionNthOptions] partition_nth_options + + def __init__(self, pivot): + self.partition_nth_options.reset(new CPartitionNthOptions(pivot)) + + cdef const CFunctionOptions* get_options(self) except NULL: + return self.partition_nth_options.get() cdef class MinMaxOptions(FunctionOptions): cdef: diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 5d5800eec58..d43e75c9027 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1721,6 +1721,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: "arrow::compute::MinMaxOptions"(CFunctionOptions): CMinMaxMode null_handling + cdef cppclass CPartitionNthOptions \ + "arrow::compute::PartitionNthOptions"(CFunctionOptions): + CPartitionNthOptions(pivot) + int64_t pivot + enum DatumType" arrow::Datum::type": DatumType_NONE" arrow::Datum::NONE" DatumType_SCALAR" arrow::Datum::SCALAR" From d5b9d12d52f70b30826be6e0d40a20c1755ee5c9 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 17 Sep 2020 10:55:07 -0400 Subject: [PATCH 09/18] alphabetise func names in Python API reference --- docs/source/python/api/compute.rst | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index b3e9192029b..5bae5bee01c 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -30,8 +30,8 @@ Aggregations count mean min_max - sum mode + sum Arithmetic Functions -------------------- @@ -45,12 +45,12 @@ throws an ``ArrowInvalid`` exception when overflow is detected. add add_checked - subtract - subtract_checked - multiply - multiply_checked divide divide_checked + multiply + multiply_checked + subtract + subtract_checked Comparisons ----------- @@ -62,12 +62,11 @@ they return ``null``. :toctree: ../generated/ equal - not_equal greater greater_equal less less_equal - + not_equal Logical Functions ----------- @@ -149,10 +148,9 @@ Containment tests .. autosummary:: :toctree: ../generated/ - match_substring index_in is_in - + match_substring Conversions ----------- @@ -197,10 +195,10 @@ Structural Transforms .. autosummary:: :toctree: ../generated/ + binary_length fill_null is_null is_valid - binary_length list_value_length list_flatten list_parent_indices \ No newline at end of file From 0b6ad515ed9c75b3d2db03b53d33dd9340527aa5 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 21 Sep 2020 14:42:51 -0400 Subject: [PATCH 10/18] __init__ -> __cinit__ in PartitionNthOptions --- python/pyarrow/_compute.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index a19e1499851..b173b7453aa 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -596,7 +596,7 @@ cdef class PartitionNthOptions(FunctionOptions): cdef: unique_ptr[CPartitionNthOptions] partition_nth_options - def __init__(self, pivot): + def __cinit__(self, pivot): self.partition_nth_options.reset(new CPartitionNthOptions(pivot)) cdef const CFunctionOptions* get_options(self) except NULL: From ff7b3b24d19be35ecea57deb306035abd1a6e206 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Mon, 21 Sep 2020 15:43:11 -0400 Subject: [PATCH 11/18] explicit typing in PartitionNthOptions --- python/pyarrow/_compute.pyx | 2 +- python/pyarrow/includes/libarrow.pxd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index b173b7453aa..25690feb7d6 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -596,7 +596,7 @@ cdef class PartitionNthOptions(FunctionOptions): cdef: unique_ptr[CPartitionNthOptions] partition_nth_options - def __cinit__(self, pivot): + def __cinit__(self, int64_t pivot): self.partition_nth_options.reset(new CPartitionNthOptions(pivot)) cdef const CFunctionOptions* get_options(self) except NULL: diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index d43e75c9027..317acd9e2ec 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1723,7 +1723,7 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: cdef cppclass CPartitionNthOptions \ "arrow::compute::PartitionNthOptions"(CFunctionOptions): - CPartitionNthOptions(pivot) + CPartitionNthOptions(int64_t pivot) int64_t pivot enum DatumType" arrow::Datum::type": From 359c0591bbf2edc86478d155e3fbc7ed7760356b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 22 Sep 2020 00:57:48 -0400 Subject: [PATCH 12/18] expose SetLookupOptions --- python/pyarrow/_compute.pyx | 23 +++++++++++++++++++++++ python/pyarrow/compute.py | 5 +++++ python/pyarrow/includes/libarrow.pxd | 6 ++++++ 3 files changed, 34 insertions(+) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 25690feb7d6..f9c2eda92d9 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -17,6 +17,8 @@ # cython: language_level = 3 +from cython.operator cimport dereference as deref + from pyarrow.lib import frombytes, tobytes, ordered_dict from pyarrow.lib cimport * from pyarrow.includes.libarrow cimport * @@ -618,3 +620,24 @@ cdef class MinMaxOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return &self.min_max_options + +cdef class SetLookupOptions(FunctionOptions): + cdef: + unique_ptr[CSetLookupOptions] set_lookup_options + unique_ptr[CDatum] valset + + def __cinit__(self, *, value_set, c_bool skip_null): + if isinstance(value_set, Array): + self.valset.reset(new CDatum(( value_set).sp_array)) + elif isinstance(value_set, ChunkedArray): + self.valset.reset(new CDatum(( value_set).sp_chunked_array)) + elif isinstance(value_set, Scalar): + self.valset.reset(new CDatum(( value_set).unwrap())) + else: + raise ValueError('"{}" is not a valid value_set'.format(value_set)) + + + self.set_lookup_options.reset(new CSetLookupOptions(deref(self.valset), skip_null)) + + cdef const CFunctionOptions* get_options(self) except NULL: + return self.set_lookup_options.get() diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 3560369ac2f..53aa389368c 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -32,6 +32,8 @@ FilterOptions, MatchSubstringOptions, MinMaxOptions, + PartitionNthOptions, + SetLookupOptions, TakeOptions, # Functions function_registry, @@ -96,8 +98,11 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class): # (export the option class name from C++ metadata?) 'cast': CastOptions, 'filter': FilterOptions, + 'index_in': SetLookupOptions, + 'is_in': SetLookupOptions, 'match_substring': MatchSubstringOptions, 'min_max': MinMaxOptions, + 'partition_nth_indices': PartitionNthOptions, 'take': TakeOptions, } diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 317acd9e2ec..e1b3d34fa52 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1751,6 +1751,12 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: shared_ptr[CTable] table() shared_ptr[CScalar] scalar() + cdef cppclass CSetLookupOptions \ + "arrow::compute::SetLookupOptions"(CFunctionOptions): + CSetLookupOptions(CDatum value_set, c_bool skip_nulls) + CDatum value_set + c_bool skip_nulls + cdef extern from "arrow/python/api.h" namespace "arrow::py": # Requires GIL From 103a177b663591470828272b74ab5d09bef7d5ae Mon Sep 17 00:00:00 2001 From: arw2019 Date: Tue, 22 Sep 2020 11:53:47 -0400 Subject: [PATCH 13/18] expose StrptimeOptions + linting --- python/pyarrow/_compute.pyx | 34 +++++++++++++++++++++++++--- python/pyarrow/compute.py | 5 ++-- python/pyarrow/includes/libarrow.pxd | 8 +++++-- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index f9c2eda92d9..7e555654922 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -630,14 +630,42 @@ cdef class SetLookupOptions(FunctionOptions): if isinstance(value_set, Array): self.valset.reset(new CDatum(( value_set).sp_array)) elif isinstance(value_set, ChunkedArray): - self.valset.reset(new CDatum(( value_set).sp_chunked_array)) + self.valset.reset( + new CDatum(( value_set).sp_chunked_array) + ) elif isinstance(value_set, Scalar): self.valset.reset(new CDatum(( value_set).unwrap())) else: raise ValueError('"{}" is not a valid value_set'.format(value_set)) - - self.set_lookup_options.reset(new CSetLookupOptions(deref(self.valset), skip_null)) + self.set_lookup_options.reset( + new CSetLookupOptions(deref(self.valset), skip_null) + ) cdef const CFunctionOptions* get_options(self) except NULL: return self.set_lookup_options.get() + + +cdef class StrptimeOptions(FunctionOptions): + cdef: + unique_ptr[CStrptimeOptions] strptime_options + TimeUnit time_unit + + def __cinit__(self, format, unit): + if unit == 's': + self.time_unit = TimeUnit_SECOND + elif unit == 'ms': + self.time_unit = TimeUnit_MILLI + elif unit == 'us': + self.time_unit = TimeUnit_MICRO + elif unit == 'ns': + self.time_unit = TimeUnit_NANO + else: + raise ValueError('"{}" is not a valid time unit'.format(unit)) + + self.strptime_options.reset( + new CStrptimeOptions(tobytes(format), self.time_unit) + ) + + cdef const CFunctionOptions* get_options(self) except NULL: + return self.strptime_options.get() diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 53aa389368c..c5858478c94 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -34,6 +34,7 @@ MinMaxOptions, PartitionNthOptions, SetLookupOptions, + StrptimeOptions, TakeOptions, # Functions function_registry, @@ -94,8 +95,7 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class): _option_classes = { - # TODO this is not complete - # (export the option class name from C++ metadata?) + # TODO: export the option class name from C++ metadata? 'cast': CastOptions, 'filter': FilterOptions, 'index_in': SetLookupOptions, @@ -103,6 +103,7 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class): 'match_substring': MatchSubstringOptions, 'min_max': MinMaxOptions, 'partition_nth_indices': PartitionNthOptions, + 'strptime': StrptimeOptions, 'take': TakeOptions, } diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index e1b3d34fa52..2702a151626 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1710,6 +1710,10 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: " arrow::compute::TakeOptions"(CFunctionOptions): c_bool boundscheck + cdef cppclass CStrptimeOptions \ + "arrow::compute::StrptimeOptions"(CFunctionOptions): + CStrptimeOptions(c_string format, TimeUnit unit) + enum CMinMaxMode \ "arrow::compute::MinMaxOptions::Mode": CMinMaxMode_SKIP \ @@ -1752,9 +1756,9 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: shared_ptr[CScalar] scalar() cdef cppclass CSetLookupOptions \ - "arrow::compute::SetLookupOptions"(CFunctionOptions): + "arrow::compute::SetLookupOptions"(CFunctionOptions): CSetLookupOptions(CDatum value_set, c_bool skip_nulls) - CDatum value_set + CDatum value_set c_bool skip_nulls From 0bcedcf673b37544dc285f9582b1d0807da83ae5 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 7 Oct 2020 12:28:33 -0400 Subject: [PATCH 14/18] add link to C++ compute docs --- docs/source/python/compute.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index 73ee2c2b3de..77e8da306c8 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -51,5 +51,5 @@ Below are a few simple examples: .. seealso:: - A comprehensive description of compute functions can be found in the - C++ implementation docs. + + :ref:`C++ compute functions documentation `. From 38346a3b0522ca350891f9f58f9a19b901dff171 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 7 Oct 2020 13:14:04 -0400 Subject: [PATCH 15/18] expose stddev & variance kernels --- docs/source/python/api/compute.rst | 2 ++ python/pyarrow/_compute.pyx | 10 ++++++++++ python/pyarrow/compute.py | 3 +++ python/pyarrow/includes/libarrow.pxd | 5 +++++ 4 files changed, 20 insertions(+) diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 5bae5bee01c..873648361b6 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -31,7 +31,9 @@ Aggregations mean min_max mode + stddev sum + variance Arithmetic Functions -------------------- diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 7e555654922..f0d28b62e01 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -669,3 +669,13 @@ cdef class StrptimeOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return self.strptime_options.get() + +cdef class VarianceOptions(FunctionOptions): + cdef: + unique_ptr[CVarianceOptions] variance_options + + def __cinit__(self, *, ddof=0): + self.variance_options.reset(new CVarianceOptions(ddof)) + + cdef const CFunctionOptions* get_options(self) except NULL: + return self.variance_options.get() \ No newline at end of file diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index c5858478c94..c60188b513c 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -36,6 +36,7 @@ SetLookupOptions, StrptimeOptions, TakeOptions, + VarianceOptions, # Functions function_registry, call_function, @@ -103,8 +104,10 @@ def _decorate_compute_function(wrapper, exposed_name, func, option_class): 'match_substring': MatchSubstringOptions, 'min_max': MinMaxOptions, 'partition_nth_indices': PartitionNthOptions, + 'stddev': VarianceOptions, 'strptime': StrptimeOptions, 'take': TakeOptions, + 'variance': VarianceOptions, } diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 2702a151626..3093cf5630d 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1714,6 +1714,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: "arrow::compute::StrptimeOptions"(CFunctionOptions): CStrptimeOptions(c_string format, TimeUnit unit) + cdef cppclass CVarianceOptions \ + "arrow::compute::VarianceOptions"(CFunctionOptions): + CVarianceOptions(int ddof) + int ddof + enum CMinMaxMode \ "arrow::compute::MinMaxOptions::Mode": CMinMaxMode_SKIP \ From 08429f2a9b5e6c28b2d98ef9cf14c968bc206c9b Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 7 Oct 2020 13:18:24 -0400 Subject: [PATCH 16/18] linting --- python/pyarrow/_compute.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index f0d28b62e01..7b0ae9ce880 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -678,4 +678,4 @@ cdef class VarianceOptions(FunctionOptions): self.variance_options.reset(new CVarianceOptions(ddof)) cdef const CFunctionOptions* get_options(self) except NULL: - return self.variance_options.get() \ No newline at end of file + return self.variance_options.get() From 8d81df2b06441332cba16efc01aa54e949bd3170 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 7 Oct 2020 13:32:00 -0400 Subject: [PATCH 17/18] don't use unique_ptr in VarianceOptions --- python/pyarrow/_compute.pyx | 6 +++--- python/pyarrow/includes/libarrow.pxd | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 7b0ae9ce880..ed50ccc3dca 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -672,10 +672,10 @@ cdef class StrptimeOptions(FunctionOptions): cdef class VarianceOptions(FunctionOptions): cdef: - unique_ptr[CVarianceOptions] variance_options + CVarianceOptions variance_options def __cinit__(self, *, ddof=0): - self.variance_options.reset(new CVarianceOptions(ddof)) + self.variance_options.ddof = ddof cdef const CFunctionOptions* get_options(self) except NULL: - return self.variance_options.get() + return &self.variance_options diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 3093cf5630d..dee022f5ca7 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -1716,7 +1716,6 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: cdef cppclass CVarianceOptions \ "arrow::compute::VarianceOptions"(CFunctionOptions): - CVarianceOptions(int ddof) int ddof enum CMinMaxMode \ From 73237b09ce41ab1447162ca1802e4a9b8cb2fcd3 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 8 Oct 2020 13:08:55 +0200 Subject: [PATCH 18/18] Improve docs and tests a bit --- docs/source/cpp/compute.rst | 2 ++ docs/source/python/api.rst | 1 + docs/source/python/api/compute.rst | 4 ++-- docs/source/python/compute.rst | 2 +- docs/source/python/index.rst | 1 + python/pyarrow/_compute.pyx | 4 ++++ python/pyarrow/compute.py | 6 ++---- python/pyarrow/tests/test_compute.py | 14 +++++++++----- 8 files changed, 22 insertions(+), 12 deletions(-) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 62b413b40dd..af2f485058c 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -101,6 +101,8 @@ exact semantics of the function:: :doc:`Compute API reference ` +.. _compute-function-list: + Available functions =================== diff --git a/docs/source/python/api.rst b/docs/source/python/api.rst index 5c4d6074d62..12cf4e06802 100644 --- a/docs/source/python/api.rst +++ b/docs/source/python/api.rst @@ -27,6 +27,7 @@ API Reference api/datatypes api/arrays api/memory + api/compute api/files api/tables api/ipc diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 873648361b6..2ec355d66af 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -16,7 +16,7 @@ .. under the License. .. _api.compute: -.. currentmodule:: pyarrow +.. currentmodule:: pyarrow.compute Compute Functions ================= @@ -203,4 +203,4 @@ Structural Transforms is_valid list_value_length list_flatten - list_parent_indices \ No newline at end of file + list_parent_indices diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index 77e8da306c8..51126d97c82 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -52,4 +52,4 @@ Below are a few simple examples: .. seealso:: - :ref:`C++ compute functions documentation `. + :ref:`Available compute functions (C++ documentation) `. diff --git a/docs/source/python/index.rst b/docs/source/python/index.rst index d4daf4029ac..cc7383044e0 100644 --- a/docs/source/python/index.rst +++ b/docs/source/python/index.rst @@ -36,6 +36,7 @@ files into Arrow structures. install memory data + compute ipc filesystems filesystems_deprecated diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index ed50ccc3dca..323b9c43f68 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -594,6 +594,7 @@ cdef class TakeOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return &self.take_options + cdef class PartitionNthOptions(FunctionOptions): cdef: unique_ptr[CPartitionNthOptions] partition_nth_options @@ -604,6 +605,7 @@ cdef class PartitionNthOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return self.partition_nth_options.get() + cdef class MinMaxOptions(FunctionOptions): cdef: CMinMaxOptions min_max_options @@ -621,6 +623,7 @@ cdef class MinMaxOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return &self.min_max_options + cdef class SetLookupOptions(FunctionOptions): cdef: unique_ptr[CSetLookupOptions] set_lookup_options @@ -670,6 +673,7 @@ cdef class StrptimeOptions(FunctionOptions): cdef const CFunctionOptions* get_options(self) except NULL: return self.strptime_options.get() + cdef class VarianceOptions(FunctionOptions): cdef: CVarianceOptions variance_options diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index c60188b513c..df6d21505d1 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -import sys - from pyarrow._compute import ( # noqa Function, FunctionRegistry, @@ -429,5 +427,5 @@ def fill_null(values, fill_value): return call_function("fill_null", [values, fill_value]) -and_ = getattr(sys.modules[__name__], 'and') -or_ = getattr(sys.modules[__name__], 'or') +and_ = globals()['and'] +or_ = globals()['or'] diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 89b43dcaeb3..dc1b2856287 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -19,7 +19,6 @@ from functools import lru_cache import pickle import pytest -import sys import textwrap import numpy as np @@ -225,6 +224,13 @@ def test_mode_chunked_array(): assert pc.mode(arr).as_py() == expected +def test_variance(): + data = [1, 2, 3, 4, 5, 6, 7, 8] + assert pc.variance(data).as_py() == 5.25 + assert pc.variance(data, ddof=0).as_py() == 5.25 + assert pc.variance(data, ddof=1).as_py() == 6.0 + + def test_match_substring(): arr = pa.array(["ab", "abc", "ba", None]) result = pc.match_substring(arr, "ab") @@ -827,7 +833,6 @@ def test_fill_null_chunked_array(arrow_type): def test_logical(): - a = pa.array([True, False, False, None]) b = pa.array([True, True, False, True]) @@ -835,7 +840,7 @@ def test_logical(): assert pc.and_kleene(a, b) == pa.array([True, False, False, None]) assert pc.or_(a, b) == pa.array([True, True, False, None]) - assert pc.and_kleene(a, b) == pa.array([True, False, False, None]) + assert pc.or_kleene(a, b) == pa.array([True, True, False, True]) assert pc.xor(a, b) == pa.array([False, True, False, None]) @@ -843,8 +848,7 @@ def test_logical(): def test_cast(): - - arr = pa.array([sys.maxsize], type='int64') + arr = pa.array([2**63 - 1], type='int64') with pytest.raises(pa.ArrowInvalid): pc.cast(arr, 'int32')