From 36db249361c8c9364138b4bd4bb7e68360269edb Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 14 Feb 2023 23:27:24 +0100 Subject: [PATCH 1/7] GH-34154: [Python] Add `is_nan` expression Closes #34154 --- python/pyarrow/_compute.pyx | 13 +++++++++++++ python/pyarrow/array.pxi | 6 ++++++ python/pyarrow/tests/test_compute.py | 4 ++++ 3 files changed, 23 insertions(+) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 2f6d67c9911..4bcb3548c65 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2436,6 +2436,19 @@ cdef class Expression(_Weakrefable): options = NullOptions(nan_is_null=nan_is_null) return Expression._call("is_null", [self], options) + def is_nan(self): + """ + Check whether the expression is nan. + + This creates a new expression equivalent to calling the + `is_nan` compute function on this expression. + + Returns + ------- + is_nan : Expression + """ + return Expression._call("is_nan", [self]) + def cast(self, type=None, safe=None, options=None): """ Explicitly set or change the expression's data type. diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 897950d218a..f314dd8aa56 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1253,6 +1253,12 @@ cdef class Array(_PandasConvertible): options = _pc().NullOptions(nan_is_null=nan_is_null) return _pc().call_function('is_null', [self], options) + def is_nan(self): + """ + Return BooleanArray indicating the nan values. + """ + return _pc().call_function('is_nan', [self]) + def is_valid(self): """ Return BooleanArray indicating the non-null values. diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index ed79b47104d..677d3b30a1d 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1634,6 +1634,10 @@ def test_is_null(): expected = pa.array([False, False, False, True, True]) assert result.equals(expected) + result = arr.is_nan() + expected = pa.array([False, False, False, False, True]) + assert result.equals(expected) + def test_fill_null(): arr = pa.array([1, 2, None, 4], type=pa.int8()) From ba349b5d0e09ce8ca7af60db1635ed3cebc549e0 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 23 Feb 2023 08:55:54 +0100 Subject: [PATCH 2/7] Update test and docstring --- python/pyarrow/array.pxi | 4 ++++ python/pyarrow/tests/test_compute.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index f314dd8aa56..fff930bcb52 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1256,6 +1256,10 @@ cdef class Array(_PandasConvertible): def is_nan(self): """ Return BooleanArray indicating the nan values. + + Returns + ------- + array : boolean Array, where null values are None """ return _pc().call_function('is_nan', [self]) diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 677d3b30a1d..234cf1ea4c4 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1635,7 +1635,7 @@ def test_is_null(): assert result.equals(expected) result = arr.is_nan() - expected = pa.array([False, False, False, False, True]) + expected = pa.array([False, False, False, None, True]) assert result.equals(expected) From 2793dd524d04402072fb4fe99050d71cee31d637 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 23 Feb 2023 13:34:07 +0100 Subject: [PATCH 3/7] Add tests --- python/pyarrow/table.pxi | 26 ++++++++++++++++++++++++++ python/pyarrow/tests/test_compute.py | 14 ++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 8b950c14b13..67ab544e994 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -327,6 +327,32 @@ cdef class ChunkedArray(_PandasConvertible): options = _pc().NullOptions(nan_is_null=nan_is_null) return _pc().call_function('is_null', [self], options) + def is_nan(self): + """ + Return boolean array indicating the nan values. + + Examples + -------- + >>> import pyarrow as pa + >>> import numpy as np + >>> n_legs = pa.chunked_array([[2, np.nan, 4], [4, None, 100]]) + >>> n_legs.is_nan() + + [ + [ + false, + true, + false + ], + [ + false, + None, + false + ] + ] + """ + return _pc().is_valid(self) + def is_valid(self): """ Return boolean array indicating the non-null values. diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 9d0b3df8a02..dbf64c674ac 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -35,6 +35,7 @@ import pyarrow as pa import pyarrow.compute as pc +from pyarrow.lib import ArrowNotImplementedError all_array_types = [ ('bool', [True, False, False, True, True]), @@ -1634,10 +1635,23 @@ def test_is_null(): expected = pa.array([False, False, False, True, True]) assert result.equals(expected) + +def test_is_nan(): + arr = pa.array([1, 2, 3, None, np.nan]) result = arr.is_nan() expected = pa.array([False, False, False, None, True]) assert result.equals(expected) + with pytest.raises( + ArrowNotImplementedError, match="has no kernel matching input types"): + arr = pa.array(["1", "2", None], type=pa.string()) + _ = arr.is_nan() + + with pytest.raises( + ArrowNotImplementedError, match="has no kernel matching input types"): + arr = pa.array([b'a', b'bb', None], type=pa.large_binary()) + _ = arr.is_nan() + def test_fill_null(): arr = pa.array([1, 2, None, 4], type=pa.int8()) From df625f2abf7f460b9935503998c075ad9f3c5e62 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Tue, 28 Feb 2023 09:19:22 +0100 Subject: [PATCH 4/7] Oops --- python/pyarrow/table.pxi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 67ab544e994..076432855ea 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -335,8 +335,8 @@ cdef class ChunkedArray(_PandasConvertible): -------- >>> import pyarrow as pa >>> import numpy as np - >>> n_legs = pa.chunked_array([[2, np.nan, 4], [4, None, 100]]) - >>> n_legs.is_nan() + >>> arr = pa.chunked_array([[2, np.nan, 4], [4, None, 100]]) + >>> arr.is_nan() [ [ @@ -351,7 +351,7 @@ cdef class ChunkedArray(_PandasConvertible): ] ] """ - return _pc().is_valid(self) + return _pc().is_nan(self) def is_valid(self): """ From c254cf5d588eed8a2c5263c8702d54f9536ce2ac Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Thu, 2 Mar 2023 10:08:40 +0100 Subject: [PATCH 5/7] Apply suggestions from code review Co-authored-by: Joris Van den Bossche --- python/pyarrow/_compute.pyx | 2 +- python/pyarrow/array.pxi | 2 +- python/pyarrow/table.pxi | 2 +- python/pyarrow/tests/test_compute.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index 32ec5e81e3a..4b84fd64855 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2438,7 +2438,7 @@ cdef class Expression(_Weakrefable): def is_nan(self): """ - Check whether the expression is nan. + Check whether the expression is NaN. This creates a new expression equivalent to calling the `is_nan` compute function on this expression. diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 8351b1b08c0..47499a1a002 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1255,7 +1255,7 @@ cdef class Array(_PandasConvertible): def is_nan(self): """ - Return BooleanArray indicating the nan values. + Return BooleanArray indicating the NaN values. Returns ------- diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 076432855ea..cfacc47ac97 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -329,7 +329,7 @@ cdef class ChunkedArray(_PandasConvertible): def is_nan(self): """ - Return boolean array indicating the nan values. + Return boolean array indicating the NaN values. Examples -------- diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index dbf64c674ac..51e12e8c4e0 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1642,9 +1642,9 @@ def test_is_nan(): expected = pa.array([False, False, False, None, True]) assert result.equals(expected) + arr = pa.array(["1", "2", None], type=pa.string()) with pytest.raises( ArrowNotImplementedError, match="has no kernel matching input types"): - arr = pa.array(["1", "2", None], type=pa.string()) _ = arr.is_nan() with pytest.raises( From d4c13610eb338d8b56722c4b827e3fc60897c313 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 3 Mar 2023 13:09:33 +0100 Subject: [PATCH 6/7] Update array.pxi --- python/pyarrow/array.pxi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 47499a1a002..417675f5599 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -1259,7 +1259,7 @@ cdef class Array(_PandasConvertible): Returns ------- - array : boolean Array, where null values are None + array : boolean Array """ return _pc().call_function('is_nan', [self]) From 384e62d6fd6fd6437e294e5395f56c7b3a9901c0 Mon Sep 17 00:00:00 2001 From: Fokko Driesprong Date: Fri, 3 Mar 2023 22:00:19 +0100 Subject: [PATCH 7/7] Fix tests --- python/pyarrow/table.pxi | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 076432855ea..83dff8efa29 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -342,11 +342,9 @@ cdef class ChunkedArray(_PandasConvertible): [ false, true, - false - ], - [ false, - None, + false, + null, false ] ]