apache · lidavidm · May 21, 2021 · May 21, 2021 · May 21, 2021
diff --git a/cpp/src/arrow/compute/kernels/scalar_validity.cc b/cpp/src/arrow/compute/kernels/scalar_validity.cc
@@ -60,6 +60,20 @@ struct IsValidOperator {
   }
 };
 
+struct IsFiniteOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isfinite(value);
+  }
+};
+
+struct IsInfOperator {
+  template <typename OutType, typename InType>
+  static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
+    return std::isinf(value);
+  }
+};
+
 struct IsNullOperator {
   static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
     checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
@@ -103,19 +117,38 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
   DCHECK_OK(registry->AddFunction(std::move(func)));
 }
 
-template <typename InType>
-void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
-  DCHECK_OK(
-      func->AddKernel({ty}, boolean(),
-                      applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
+template <typename InType, typename Op>
+void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
+  DCHECK_OK(func->AddKernel({ty}, boolean(),
+                            applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
+}
+
+std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
+                                                     const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());
+
+  return func;
+}
+
+std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
+                                                  const FunctionDoc* doc) {
+  auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
+
+  AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());
+
+  return func;
 }
 
 std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
                                                   const FunctionDoc* doc) {
   auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);
 
-  AddIsNanKernel<FloatType>(float32(), func.get());
-  AddIsNanKernel<DoubleType>(float64(), func.get());
+  AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
+  AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());
 
   return func;
 }
@@ -159,6 +192,16 @@ const FunctionDoc is_valid_doc(
     "Return true if non-null",
     ("For each input value, emit true iff the value is valid (non-null)."), {"values"});
 
+const FunctionDoc is_finite_doc(
+    "Return true if value is finite",
+    ("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
+    {"values"});
+
+const FunctionDoc is_inf_doc(
+    "Return true if infinity",
+    ("For each input value, emit true iff the value is infinite (inf or -inf)."),
+    {"values"});
+
 const FunctionDoc is_null_doc("Return true if null",
                               ("For each input value, emit true iff the value is null."),
                               {"values"});
@@ -177,6 +220,8 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
                registry, MemAllocation::PREALLOCATE,
                /*can_write_into_slices=*/true);
 
+  DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
+  DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
   DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
 }
 

diff --git a/cpp/src/arrow/compute/kernels/scalar_validity_test.cc b/cpp/src/arrow/compute/kernels/scalar_validity_test.cc
@@ -88,15 +88,107 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) {
   CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true));
 }
 
+TEST_F(TestFloatValidityKernels, FloatArrayIsFinite) {
+  // All Inf
+  CheckScalarUnary("is_finite", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
+  // No Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
+  // Some Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleArrayIsFinite) {
+  // All Inf
+  CheckScalarUnary("is_finite", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
+  // No Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
+  // Some Inf
+  CheckScalarUnary("is_finite",
+                   ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
+}
+
+TEST_F(TestFloatValidityKernels, FloatScalarIsFinite) {
+  CheckScalarUnary("is_finite", MakeNullScalar(float32()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_finite", MakeScalar(42.0f), MakeScalar(true));
+  CheckScalarUnary("is_finite", MakeScalar(std::nanf("")), MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleScalarIsFinite) {
+  CheckScalarUnary("is_finite", MakeNullScalar(float64()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_finite", MakeScalar(42.0), MakeScalar(true));
+  CheckScalarUnary("is_finite", MakeScalar(std::nan("")), MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+}
+
+TEST_F(TestFloatValidityKernels, FloatArrayIsInf) {
+  // All Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
+  // No Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
+  // Some Infs
+  CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleArrayIsInf) {
+  // All Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
+                   ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
+  // No Inf
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
+  // Some Infs
+  CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
+                   ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
+}
+
+TEST_F(TestFloatValidityKernels, FloatScalarIsInf) {
+  CheckScalarUnary("is_inf", MakeNullScalar(float32()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_inf", MakeScalar(42.0f), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::nanf("")), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(true));
+  CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(true));
+}
+
+TEST_F(TestDoubleValidityKernels, DoubleScalarIsInf) {
+  CheckScalarUnary("is_inf", MakeNullScalar(float64()), MakeNullScalar(boolean()));
+  CheckScalarUnary("is_inf", MakeScalar(42.0), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::nan("")), MakeScalar(false));
+  CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(true));
+  CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(true));
+}
+
 TEST_F(TestFloatValidityKernels, FloatArrayIsNan) {
   // All NaN
   CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
   // No NaN
-  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
   // Some NaNs
-  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
 }
 
@@ -105,23 +197,31 @@ TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) {
   CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"),
                    ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
   // No NaN
-  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
   // Some NaNs
-  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
+  CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
                    ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
 }
 
 TEST_F(TestFloatValidityKernels, FloatScalarIsNan) {
   CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean()));
   CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false));
   CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true));
+  CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<float>::infinity()),
+                   MakeScalar(false));
 }
 
 TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) {
   CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean()));
   CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false));
   CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true));
+  CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
+  CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<double>::infinity()),
+                   MakeScalar(false));
 }
 
 }  // namespace compute

diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
@@ -638,31 +638,40 @@ Structural transforms
 +==========================+============+================================================+=====================+=========+
 | fill_null                | Binary     | Boolean, Null, Numeric, Temporal, String-like  | Input type          | \(1)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(2)    |
+| is_finite                | Unary      | Float, Double                                  | Boolean             | \(2)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                            | Boolean             | \(3)    |
+| is_inf                   | Unary      | Float, Double                                  | Boolean             | \(3)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                            | Boolean             | \(4)    |
+| is_nan                   | Unary      | Float, Double                                  | Boolean             | \(4)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(5)    |
+| is_null                  | Unary      | Any                                            | Boolean             | \(5)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
-| project                  | Varargs    | Any                                            | Struct              | \(6)    |
+| is_valid                 | Unary      | Any                                            | Boolean             | \(6)    |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+| list_value_length        | Unary      | List-like                                      | Int32 or Int64      | \(7)    |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+| project                  | Varargs    | Any                                            | Struct              | \(8)    |
 +--------------------------+------------+------------------------------------------------+---------------------+---------+
 
 * \(1) First input must be an array, second input a scalar of the same type.
   Output is an array of the same type as the inputs, and with the same values
   as the first input, except for nulls replaced with the second input value.
 
-* \(2) Output is true iff the corresponding input element is NaN.
+* \(2) Output is true iff the corresponding input element is finite (not Infinity,
+  -Infinity, or NaN).
+
+* \(3) Output is true iff the corresponding input element is Infinity/-Infinity.
+
+* \(4) Output is true iff the corresponding input element is NaN.
 
-* \(3) Output is true iff the corresponding input element is null.
+* \(5) Output is true iff the corresponding input element is null.
 
-* \(4) Output is true iff the corresponding input element is non-null.
+* \(6) Output is true iff the corresponding input element is non-null.
 
-* \(5) Each output element is the length of the corresponding input element
+* \(7) Each output element is the length of the corresponding input element
   (null if input is null).  Output type is Int32 for List, Int64 for LargeList.
 
-* \(6) The output struct's field types are the types of its arguments. The
+* \(8) The output struct's field types are the types of its arguments. The
   field names are specified using an instance of :struct:`ProjectOptions`.
   The output shape will be scalar if all inputs are scalar, otherwise any
   scalars will be broadcast to arrays.

diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
@@ -220,6 +220,9 @@ Structural Transforms
 
    binary_length
    fill_null
+   is_finite
+   is_inf
+   is_nan
    is_null
    is_valid
    list_value_length

diff --git a/r/NAMESPACE b/r/NAMESPACE
@@ -46,9 +46,10 @@ S3method(head,ArrowDatum)
 S3method(head,ArrowTabular)
 S3method(head,Dataset)
 S3method(head,arrow_dplyr_query)
+S3method(is.finite,ArrowDatum)
+S3method(is.infinite,ArrowDatum)
 S3method(is.na,ArrowDatum)
 S3method(is.na,Expression)
-S3method(is.na,Scalar)
 S3method(is.nan,ArrowDatum)
 S3method(is_in,ArrowDatum)
 S3method(is_in,default)

diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
@@ -32,6 +32,20 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
 #' @export
 length.ArrowDatum <- function(x) x$length()
 
+#' @export
+is.finite.ArrowDatum <- function(x) {
+  is_fin <- call_function("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !is.na(is_fin)
+}
+
+#' @export
+is.infinite.ArrowDatum <- function(x) {
+  is_inf <- call_function("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !is.na(is_inf)
+}
+
 #' @export
 is.na.ArrowDatum <- function(x) call_function("is_null", x)
 

diff --git a/r/R/dplyr-functions.R b/r/R/dplyr-functions.R
@@ -72,6 +72,18 @@ nse_funcs$between <- function(x, left, right) {
   x >= left & x <= right
 }
 
+nse_funcs$is.finite <- function(x) {
+  is_fin <- Expression$create("is_finite", x)
+  # for compatibility with base::is.finite(), return FALSE for NA_real_
+  is_fin & !nse_funcs$is.na(is_fin)
+}
+
+nse_funcs$is.infinite <- function(x) {
+  is_inf <- Expression$create("is_inf", x)
+  # for compatibility with base::is.infinite(), return FALSE for NA_real_
+  is_inf & !nse_funcs$is.na(is_inf)
+}
+
 # as.* type casting functions
 # as.factor() is mapped in expression.R
 nse_funcs$as.character <- function(x) {

diff --git a/r/R/expression.R b/r/R/expression.R
@@ -22,8 +22,7 @@
   "as.factor" = "dictionary_encode",
   "is.na" = "is_null",
   "is.nan" = "is_nan",
-  # nchar is defined in dplyr.R because it is more complex
-  # "nchar" = "utf8_length",
+  # nchar is defined in dplyr-functions.R
   "tolower" = "utf8_lower",
   "toupper" = "utf8_upper",
   # stringr spellings of those

diff --git a/r/R/scalar.R b/r/R/scalar.R
@@ -72,8 +72,5 @@ StructScalar <- R6Class("StructScalar",
 #' @export
 length.Scalar <- function(x) 1L
 
-#' @export
-is.na.Scalar <- function(x) !x$is_valid
-
 #' @export
 sort.Scalar <- function(x, decreasing = FALSE, ...) x
diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R
@@ -108,6 +108,22 @@ test_that("ChunkedArray handles !!! splicing", {
   expect_equal(x$num_chunks, 3L)
 })
 
+test_that("ChunkedArray handles Inf", {
+  data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10)
+  x <- chunked_array(!!!data)
+  expect_equal(x$type, float64())
+  expect_equal(x$num_chunks, 3L)
+  expect_equal(length(x), 25L)
+  expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10))
+
+  chunks <- x$chunks
+  expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]]))
+  expect_equal(
+    as.vector(is.infinite(x)),
+    c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]]))
+  )
+})
+
 test_that("ChunkedArray handles NA", {
   data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
   x <- chunked_array(!!!data)