From 74ba00783ce2e16812c05b7ceefdd6af1a830c53 Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 21 May 2021 10:41:17 -0400 Subject: [PATCH 1/3] ARROW-12843: [C++][R] Implement is_inf kernel --- .../arrow/compute/kernels/scalar_validity.cc | 35 ++++++++--- .../compute/kernels/scalar_validity_test.cc | 60 +++++++++++++++++-- docs/source/cpp/compute.rst | 24 ++++---- docs/source/python/api/compute.rst | 2 + r/R/arrow-datum.R | 6 ++ r/R/expression.R | 1 + r/tests/testthat/test-chunked-array.R | 16 +++++ 7 files changed, 123 insertions(+), 21 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index ebb3dca0d1e..6d19b733ce8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -60,6 +60,13 @@ struct IsValidOperator { } }; +struct IsInfOperator { + template + static constexpr OutType Call(KernelContext*, const InType& value, Status*) { + return std::isinf(value); + } +}; + struct IsNullOperator { static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { checked_cast(out)->value = !in.is_valid; @@ -103,19 +110,28 @@ void MakeFunction(std::string name, const FunctionDoc* doc, DCHECK_OK(registry->AddFunction(std::move(func))); } -template -void AddIsNanKernel(const std::shared_ptr& ty, ScalarFunction* func) { - DCHECK_OK( - func->AddKernel({ty}, boolean(), - applicator::ScalarUnary::Exec)); +template +void AddFloatValidityKernel(const std::shared_ptr& ty, ScalarFunction* func) { + DCHECK_OK(func->AddKernel({ty}, boolean(), + applicator::ScalarUnary::Exec)); +} + +std::shared_ptr MakeIsInfFunction(std::string name, + const FunctionDoc* doc) { + auto func = std::make_shared(name, Arity::Unary(), doc); + + AddFloatValidityKernel(float32(), func.get()); + AddFloatValidityKernel(float64(), func.get()); + + return func; } std::shared_ptr MakeIsNanFunction(std::string name, const FunctionDoc* doc) { auto func = std::make_shared(name, Arity::Unary(), doc); - AddIsNanKernel(float32(), func.get()); - AddIsNanKernel(float64(), func.get()); + AddFloatValidityKernel(float32(), func.get()); + AddFloatValidityKernel(float64(), func.get()); return func; } @@ -159,6 +175,10 @@ const FunctionDoc is_valid_doc( "Return true if non-null", ("For each input value, emit true iff the value is valid (non-null)."), {"values"}); +const FunctionDoc is_inf_doc( + "Return true if infinity", + ("For each input value, emit true iff the value is inf or -inf."), {"values"}); + const FunctionDoc is_null_doc("Return true if null", ("For each input value, emit true iff the value is null."), {"values"}); @@ -177,6 +197,7 @@ void RegisterScalarValidity(FunctionRegistry* registry) { registry, MemAllocation::PREALLOCATE, /*can_write_into_slices=*/true); + DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc))); DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc))); } diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc index 54fa5967f7a..2d2bb197106 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc @@ -88,15 +88,59 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) { CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true)); } +TEST_F(TestFloatValidityKernels, FloatArrayIsInf) { + // All Inf + CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"), + ArrayFromJSON(boolean(), "[true, true, true, true, true]")); + // No Inf + CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"), + ArrayFromJSON(boolean(), "[false, false, false, false, false, null]")); + // Some Infs + CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"), + ArrayFromJSON(boolean(), "[false, true, false, true, false, null]")); +} + +TEST_F(TestDoubleValidityKernels, DoubleArrayIsInf) { + // All Inf + CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"), + ArrayFromJSON(boolean(), "[true, true, true, true, true]")); + // No Inf + CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"), + ArrayFromJSON(boolean(), "[false, false, false, false, false, null]")); + // Some Infs + CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"), + ArrayFromJSON(boolean(), "[false, true, false, true, false, null]")); +} + +TEST_F(TestFloatValidityKernels, FloatScalarIsInf) { + CheckScalarUnary("is_inf", MakeNullScalar(float32()), MakeNullScalar(boolean())); + CheckScalarUnary("is_inf", MakeScalar(42.0f), MakeScalar(false)); + CheckScalarUnary("is_inf", MakeScalar(std::nanf("")), MakeScalar(false)); + CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits::infinity()), + MakeScalar(true)); + CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits::infinity()), + MakeScalar(true)); +} + +TEST_F(TestDoubleValidityKernels, DoubleScalarIsInf) { + CheckScalarUnary("is_inf", MakeNullScalar(float64()), MakeNullScalar(boolean())); + CheckScalarUnary("is_inf", MakeScalar(42.0), MakeScalar(false)); + CheckScalarUnary("is_inf", MakeScalar(std::nan("")), MakeScalar(false)); + CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits::infinity()), + MakeScalar(true)); + CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits::infinity()), + MakeScalar(true)); +} + TEST_F(TestFloatValidityKernels, FloatArrayIsNan) { // All NaN CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"), ArrayFromJSON(boolean(), "[true, true, true, true, true]")); // No NaN - CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"), + CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"), ArrayFromJSON(boolean(), "[false, false, false, false, false, null]")); // Some NaNs - CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"), + CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, Inf, null]"), ArrayFromJSON(boolean(), "[false, true, false, true, false, null]")); } @@ -105,10 +149,10 @@ TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) { CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"), ArrayFromJSON(boolean(), "[true, true, true, true, true]")); // No NaN - CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"), + CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"), ArrayFromJSON(boolean(), "[false, false, false, false, false, null]")); // Some NaNs - CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"), + CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, Inf, null]"), ArrayFromJSON(boolean(), "[false, true, false, true, false, null]")); } @@ -116,12 +160,20 @@ TEST_F(TestFloatValidityKernels, FloatScalarIsNan) { CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean())); CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false)); CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true)); + CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits::infinity()), + MakeScalar(false)); + CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits::infinity()), + MakeScalar(false)); } TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) { CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean())); CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false)); CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true)); + CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits::infinity()), + MakeScalar(false)); + CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits::infinity()), + MakeScalar(false)); } } // namespace compute diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 79140257a9b..7297a1c37b0 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -638,31 +638,35 @@ Structural transforms +==========================+============+================================================+=====================+=========+ | fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | \(1) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_nan | Unary | Float, Double | Boolean | \(2) | +| is_inf | Unary | Float, Double | Boolean | \(2) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_null | Unary | Any | Boolean | \(3) | +| is_nan | Unary | Float, Double | Boolean | \(3) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_valid | Unary | Any | Boolean | \(4) | +| is_null | Unary | Any | Boolean | \(4) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| list_value_length | Unary | List-like | Int32 or Int64 | \(5) | +| is_valid | Unary | Any | Boolean | \(5) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| project | Varargs | Any | Struct | \(6) | +| list_value_length | Unary | List-like | Int32 or Int64 | \(6) | ++--------------------------+------------+------------------------------------------------+---------------------+---------+ +| project | Varargs | Any | Struct | \(7) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ * \(1) First input must be an array, second input a scalar of the same type. Output is an array of the same type as the inputs, and with the same values as the first input, except for nulls replaced with the second input value. -* \(2) Output is true iff the corresponding input element is NaN. +* \(2) Output is true iff the corresponding input element is Infinity/-Infinity. + +* \(3) Output is true iff the corresponding input element is NaN. -* \(3) Output is true iff the corresponding input element is null. +* \(4) Output is true iff the corresponding input element is null. -* \(4) Output is true iff the corresponding input element is non-null. +* \(5) Output is true iff the corresponding input element is non-null. -* \(5) Each output element is the length of the corresponding input element +* \(6) Each output element is the length of the corresponding input element (null if input is null). Output type is Int32 for List, Int64 for LargeList. -* \(6) The output struct's field types are the types of its arguments. The +* \(7) The output struct's field types are the types of its arguments. The field names are specified using an instance of :struct:`ProjectOptions`. The output shape will be scalar if all inputs are scalar, otherwise any scalars will be broadcast to arrays. diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index d206cbc9e50..8931871ad82 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -220,6 +220,8 @@ Structural Transforms binary_length fill_null + is_inf + is_nan is_null is_valid list_value_length diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index f7c1d4d4ed7..3058fc28142 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -32,6 +32,12 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject, #' @export length.ArrowDatum <- function(x) x$length() +#' @export +is.finite.ArrowDatum <- function(x) !call_function("is_inf", x) + +#' @export +is.infinite.ArrowDatum <- function(x) call_function("is_inf", x) + #' @export is.na.ArrowDatum <- function(x) call_function("is_null", x) diff --git a/r/R/expression.R b/r/R/expression.R index 3b24b09bb8b..90a184f3ce4 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -20,6 +20,7 @@ .unary_function_map <- list( "!" = "invert", "as.factor" = "dictionary_encode", + "is.infinite" = "is_inf", "is.na" = "is_null", "is.nan" = "is_nan", # nchar is defined in dplyr.R because it is more complex diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index f5b2dca2e44..f9b102c6819 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -108,6 +108,22 @@ test_that("ChunkedArray handles !!! splicing", { expect_equal(x$num_chunks, 3L) }) +test_that("ChunkedArray handles Inf", { + data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10) + x <- chunked_array(!!!data) + expect_equal(x$type, float64()) + expect_equal(x$num_chunks, 3L) + expect_equal(length(x), 25L) + expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10)) + + chunks <- x$chunks + expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]])) + expect_equal( + as.vector(is.infinite(x)), + c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]])) + ) +}) + test_that("ChunkedArray handles NA", { data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L)) x <- chunked_array(!!!data) From d6d8073a2ad9252f6f40df1344e16825bd24cd61 Mon Sep 17 00:00:00 2001 From: David Li Date: Fri, 21 May 2021 17:19:40 -0400 Subject: [PATCH 2/3] ARROW-12843: [C++][R] Implement is_finite kernel --- .../arrow/compute/kernels/scalar_validity.cc | 26 +++++++++- .../compute/kernels/scalar_validity_test.cc | 48 +++++++++++++++++++ docs/source/cpp/compute.rst | 29 ++++++----- docs/source/python/api/compute.rst | 1 + r/R/arrow-datum.R | 2 +- 5 files changed, 92 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc index 6d19b733ce8..ead88abc0f2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity.cc @@ -60,6 +60,13 @@ struct IsValidOperator { } }; +struct IsFiniteOperator { + template + static constexpr OutType Call(KernelContext*, const InType& value, Status*) { + return std::isfinite(value); + } +}; + struct IsInfOperator { template static constexpr OutType Call(KernelContext*, const InType& value, Status*) { @@ -116,6 +123,16 @@ void AddFloatValidityKernel(const std::shared_ptr& ty, ScalarFunction* applicator::ScalarUnary::Exec)); } +std::shared_ptr MakeIsFiniteFunction(std::string name, + const FunctionDoc* doc) { + auto func = std::make_shared(name, Arity::Unary(), doc); + + AddFloatValidityKernel(float32(), func.get()); + AddFloatValidityKernel(float64(), func.get()); + + return func; +} + std::shared_ptr MakeIsInfFunction(std::string name, const FunctionDoc* doc) { auto func = std::make_shared(name, Arity::Unary(), doc); @@ -175,9 +192,15 @@ const FunctionDoc is_valid_doc( "Return true if non-null", ("For each input value, emit true iff the value is valid (non-null)."), {"values"}); +const FunctionDoc is_finite_doc( + "Return true if value is finite", + ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."), + {"values"}); + const FunctionDoc is_inf_doc( "Return true if infinity", - ("For each input value, emit true iff the value is inf or -inf."), {"values"}); + ("For each input value, emit true iff the value is infinite (inf or -inf)."), + {"values"}); const FunctionDoc is_null_doc("Return true if null", ("For each input value, emit true iff the value is null."), @@ -197,6 +220,7 @@ void RegisterScalarValidity(FunctionRegistry* registry) { registry, MemAllocation::PREALLOCATE, /*can_write_into_slices=*/true); + DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc))); DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc))); DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc))); } diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc index 2d2bb197106..1a7a1cbda15 100644 --- a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc @@ -88,6 +88,54 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) { CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true)); } +TEST_F(TestFloatValidityKernels, FloatArrayIsFinite) { + // All Inf + CheckScalarUnary("is_finite", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"), + ArrayFromJSON(boolean(), "[false, false, false, false, false]")); + // No Inf + CheckScalarUnary("is_finite", + ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"), + ArrayFromJSON(boolean(), "[true, true, true, true, false, null]")); + // Some Inf + CheckScalarUnary("is_finite", + ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"), + ArrayFromJSON(boolean(), "[true, false, true, false, false, null]")); +} + +TEST_F(TestDoubleValidityKernels, DoubleArrayIsFinite) { + // All Inf + CheckScalarUnary("is_finite", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"), + ArrayFromJSON(boolean(), "[false, false, false, false, false]")); + // No Inf + CheckScalarUnary("is_finite", + ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"), + ArrayFromJSON(boolean(), "[true, true, true, true, false, null]")); + // Some Inf + CheckScalarUnary("is_finite", + ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"), + ArrayFromJSON(boolean(), "[true, false, true, false, false, null]")); +} + +TEST_F(TestFloatValidityKernels, FloatScalarIsFinite) { + CheckScalarUnary("is_finite", MakeNullScalar(float32()), MakeNullScalar(boolean())); + CheckScalarUnary("is_finite", MakeScalar(42.0f), MakeScalar(true)); + CheckScalarUnary("is_finite", MakeScalar(std::nanf("")), MakeScalar(false)); + CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits::infinity()), + MakeScalar(false)); + CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits::infinity()), + MakeScalar(false)); +} + +TEST_F(TestDoubleValidityKernels, DoubleScalarIsFinite) { + CheckScalarUnary("is_finite", MakeNullScalar(float64()), MakeNullScalar(boolean())); + CheckScalarUnary("is_finite", MakeScalar(42.0), MakeScalar(true)); + CheckScalarUnary("is_finite", MakeScalar(std::nan("")), MakeScalar(false)); + CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits::infinity()), + MakeScalar(false)); + CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits::infinity()), + MakeScalar(false)); +} + TEST_F(TestFloatValidityKernels, FloatArrayIsInf) { // All Inf CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"), diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 7297a1c37b0..79e735af839 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -638,35 +638,40 @@ Structural transforms +==========================+============+================================================+=====================+=========+ | fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | \(1) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_inf | Unary | Float, Double | Boolean | \(2) | +| is_finite | Unary | Float, Double | Boolean | \(2) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_nan | Unary | Float, Double | Boolean | \(3) | +| is_inf | Unary | Float, Double | Boolean | \(3) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_null | Unary | Any | Boolean | \(4) | +| is_nan | Unary | Float, Double | Boolean | \(4) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| is_valid | Unary | Any | Boolean | \(5) | +| is_null | Unary | Any | Boolean | \(5) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| list_value_length | Unary | List-like | Int32 or Int64 | \(6) | +| is_valid | Unary | Any | Boolean | \(6) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ -| project | Varargs | Any | Struct | \(7) | +| list_value_length | Unary | List-like | Int32 or Int64 | \(7) | ++--------------------------+------------+------------------------------------------------+---------------------+---------+ +| project | Varargs | Any | Struct | \(8) | +--------------------------+------------+------------------------------------------------+---------------------+---------+ * \(1) First input must be an array, second input a scalar of the same type. Output is an array of the same type as the inputs, and with the same values as the first input, except for nulls replaced with the second input value. -* \(2) Output is true iff the corresponding input element is Infinity/-Infinity. +* \(2) Output is true iff the corresponding input element is finite (not Infinity, + -Infinity, or NaN). + +* \(3) Output is true iff the corresponding input element is Infinity/-Infinity. -* \(3) Output is true iff the corresponding input element is NaN. +* \(4) Output is true iff the corresponding input element is NaN. -* \(4) Output is true iff the corresponding input element is null. +* \(5) Output is true iff the corresponding input element is null. -* \(5) Output is true iff the corresponding input element is non-null. +* \(6) Output is true iff the corresponding input element is non-null. -* \(6) Each output element is the length of the corresponding input element +* \(7) Each output element is the length of the corresponding input element (null if input is null). Output type is Int32 for List, Int64 for LargeList. -* \(7) The output struct's field types are the types of its arguments. The +* \(8) The output struct's field types are the types of its arguments. The field names are specified using an instance of :struct:`ProjectOptions`. The output shape will be scalar if all inputs are scalar, otherwise any scalars will be broadcast to arrays. diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 8931871ad82..52b1b0d3092 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -220,6 +220,7 @@ Structural Transforms binary_length fill_null + is_finite is_inf is_nan is_null diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 3058fc28142..689b2f72b20 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -33,7 +33,7 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject, length.ArrowDatum <- function(x) x$length() #' @export -is.finite.ArrowDatum <- function(x) !call_function("is_inf", x) +is.finite.ArrowDatum <- function(x) call_function("is_finite", x) #' @export is.infinite.ArrowDatum <- function(x) call_function("is_inf", x) From 4ad4eb5a96f945bc54888ba7c5466a5e1a7e9f15 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 21 May 2021 18:22:18 -0400 Subject: [PATCH 3/3] Tweak R bindings --- r/NAMESPACE | 3 ++- r/R/arrow-datum.R | 12 ++++++++++-- r/R/dplyr-functions.R | 12 ++++++++++++ r/R/expression.R | 4 +--- r/R/scalar.R | 3 --- r/tests/testthat/test-dplyr.R | 21 +++++++++++++++++++++ r/tests/testthat/test-scalar.R | 2 +- 7 files changed, 47 insertions(+), 10 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index f89d2effea7..b703878d3ee 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -46,9 +46,10 @@ S3method(head,ArrowDatum) S3method(head,ArrowTabular) S3method(head,Dataset) S3method(head,arrow_dplyr_query) +S3method(is.finite,ArrowDatum) +S3method(is.infinite,ArrowDatum) S3method(is.na,ArrowDatum) S3method(is.na,Expression) -S3method(is.na,Scalar) S3method(is.nan,ArrowDatum) S3method(is_in,ArrowDatum) S3method(is_in,default) diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R index 689b2f72b20..3be8d75af0b 100644 --- a/r/R/arrow-datum.R +++ b/r/R/arrow-datum.R @@ -33,10 +33,18 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject, length.ArrowDatum <- function(x) x$length() #' @export -is.finite.ArrowDatum <- function(x) call_function("is_finite", x) +is.finite.ArrowDatum <- function(x) { + is_fin <- call_function("is_finite", x) + # for compatibility with base::is.finite(), return FALSE for NA_real_ + is_fin & !is.na(is_fin) +} #' @export -is.infinite.ArrowDatum <- function(x) call_function("is_inf", x) +is.infinite.ArrowDatum <- function(x) { + is_inf <- call_function("is_inf", x) + # for compatibility with base::is.infinite(), return FALSE for NA_real_ + is_inf & !is.na(is_inf) +} #' @export is.na.ArrowDatum <- function(x) call_function("is_null", x) diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R index e3ff5cecebd..7e0eadfdcea 100644 --- a/r/R/dplyr-functions.R +++ b/r/R/dplyr-functions.R @@ -72,6 +72,18 @@ nse_funcs$between <- function(x, left, right) { x >= left & x <= right } +nse_funcs$is.finite <- function(x) { + is_fin <- Expression$create("is_finite", x) + # for compatibility with base::is.finite(), return FALSE for NA_real_ + is_fin & !nse_funcs$is.na(is_fin) +} + +nse_funcs$is.infinite <- function(x) { + is_inf <- Expression$create("is_inf", x) + # for compatibility with base::is.infinite(), return FALSE for NA_real_ + is_inf & !nse_funcs$is.na(is_inf) +} + # as.* type casting functions # as.factor() is mapped in expression.R nse_funcs$as.character <- function(x) { diff --git a/r/R/expression.R b/r/R/expression.R index 90a184f3ce4..99d98b6af0a 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -20,11 +20,9 @@ .unary_function_map <- list( "!" = "invert", "as.factor" = "dictionary_encode", - "is.infinite" = "is_inf", "is.na" = "is_null", "is.nan" = "is_nan", - # nchar is defined in dplyr.R because it is more complex - # "nchar" = "utf8_length", + # nchar is defined in dplyr-functions.R "tolower" = "utf8_lower", "toupper" = "utf8_upper", # stringr spellings of those diff --git a/r/R/scalar.R b/r/R/scalar.R index cbda5964a2c..54fe37f081e 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -72,8 +72,5 @@ StructScalar <- R6Class("StructScalar", #' @export length.Scalar <- function(x) 1L -#' @export -is.na.Scalar <- function(x) !x$is_valid - #' @export sort.Scalar <- function(x, decreasing = FALSE, ...) x diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index 46d30e37823..bf5f06b038c 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -503,6 +503,27 @@ test_that("explicit type conversions with as.*()", { ) }) +test_that("is.finite(), is.infinite(), is.nan()", { + df <- tibble(x =c(-4.94065645841246544e-324, 1.79769313486231570e+308, 0, + NA_real_, NaN, Inf, -Inf)) + expect_dplyr_equal( + input %>% + transmute( + is_fin = is.finite(x), + is_inf = is.infinite(x) + ) %>% collect(), + df + ) + skip("is.nan() evaluates to NA on NA values (ARROW-12850)") + expect_dplyr_equal( + input %>% + transmute( + is_nan = is.nan(x) + ) %>% collect(), + df + ) +}) + test_that("as.factor()/dictionary_encode()", { skip("ARROW-12632: ExecuteScalarExpression cannot Execute non-scalar expression {x=dictionary_encode(x, {NON-REPRESENTABLE OPTIONS})}") df1 <- tibble(x = c("C", "D", "B", NA, "D", "B", "S", "A", "B", "Z", "B")) diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index 21b2836496e..d0b13423463 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -26,7 +26,7 @@ expect_scalar_roundtrip <- function(x, type) { # Should this be? Missing if all elements are missing? # expect_identical(is.na(s), all(is.na(x))) } else { - expect_identical(is.na(s), is.na(x)) + expect_identical(as.vector(is.na(s)), is.na(x)) # MakeArrayFromScalar not implemented for list types expect_equal(as.vector(s), x) }