Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 52 additions & 7 deletions cpp/src/arrow/compute/kernels/scalar_validity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,20 @@ struct IsValidOperator {
}
};

struct IsFiniteOperator {
template <typename OutType, typename InType>
static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
return std::isfinite(value);
}
};

struct IsInfOperator {
template <typename OutType, typename InType>
static constexpr OutType Call(KernelContext*, const InType& value, Status*) {
return std::isinf(value);
}
};

struct IsNullOperator {
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
checked_cast<BooleanScalar*>(out)->value = !in.is_valid;
Expand Down Expand Up @@ -103,19 +117,38 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
DCHECK_OK(registry->AddFunction(std::move(func)));
}

template <typename InType>
void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
DCHECK_OK(
func->AddKernel({ty}, boolean(),
applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
template <typename InType, typename Op>
void AddFloatValidityKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
DCHECK_OK(func->AddKernel({ty}, boolean(),
applicator::ScalarUnary<BooleanType, InType, Op>::Exec));
}

std::shared_ptr<ScalarFunction> MakeIsFiniteFunction(std::string name,
const FunctionDoc* doc) {
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);

AddFloatValidityKernel<FloatType, IsFiniteOperator>(float32(), func.get());
AddFloatValidityKernel<DoubleType, IsFiniteOperator>(float64(), func.get());

return func;
}

std::shared_ptr<ScalarFunction> MakeIsInfFunction(std::string name,
const FunctionDoc* doc) {
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);

AddFloatValidityKernel<FloatType, IsInfOperator>(float32(), func.get());
AddFloatValidityKernel<DoubleType, IsInfOperator>(float64(), func.get());

return func;
}

std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
const FunctionDoc* doc) {
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);

AddIsNanKernel<FloatType>(float32(), func.get());
AddIsNanKernel<DoubleType>(float64(), func.get());
AddFloatValidityKernel<FloatType, IsNanOperator>(float32(), func.get());
AddFloatValidityKernel<DoubleType, IsNanOperator>(float64(), func.get());

return func;
}
Expand Down Expand Up @@ -159,6 +192,16 @@ const FunctionDoc is_valid_doc(
"Return true if non-null",
("For each input value, emit true iff the value is valid (non-null)."), {"values"});

const FunctionDoc is_finite_doc(
"Return true if value is finite",
("For each input value, emit true iff the value is finite (not NaN, inf, or -inf)."),
{"values"});

const FunctionDoc is_inf_doc(
"Return true if infinity",
("For each input value, emit true iff the value is infinite (inf or -inf)."),
{"values"});

const FunctionDoc is_null_doc("Return true if null",
("For each input value, emit true iff the value is null."),
{"values"});
Expand All @@ -177,6 +220,8 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
registry, MemAllocation::PREALLOCATE,
/*can_write_into_slices=*/true);

DCHECK_OK(registry->AddFunction(MakeIsFiniteFunction("is_finite", &is_finite_doc)));
DCHECK_OK(registry->AddFunction(MakeIsInfFunction("is_inf", &is_inf_doc)));
DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
}

Expand Down
108 changes: 104 additions & 4 deletions cpp/src/arrow/compute/kernels/scalar_validity_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,107 @@ TEST_F(TestBooleanValidityKernels, ScalarIsNull) {
CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true));
}

TEST_F(TestFloatValidityKernels, FloatArrayIsFinite) {
// All Inf
CheckScalarUnary("is_finite", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
// No Inf
CheckScalarUnary("is_finite",
ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
// Some Inf
CheckScalarUnary("is_finite",
ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
}

TEST_F(TestDoubleValidityKernels, DoubleArrayIsFinite) {
// All Inf
CheckScalarUnary("is_finite", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
// No Inf
CheckScalarUnary("is_finite",
ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
ArrayFromJSON(boolean(), "[true, true, true, true, false, null]"));
// Some Inf
CheckScalarUnary("is_finite",
ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
ArrayFromJSON(boolean(), "[true, false, true, false, false, null]"));
}

TEST_F(TestFloatValidityKernels, FloatScalarIsFinite) {
CheckScalarUnary("is_finite", MakeNullScalar(float32()), MakeNullScalar(boolean()));
CheckScalarUnary("is_finite", MakeScalar(42.0f), MakeScalar(true));
CheckScalarUnary("is_finite", MakeScalar(std::nanf("")), MakeScalar(false));
CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<float>::infinity()),
MakeScalar(false));
CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<float>::infinity()),
MakeScalar(false));
}

TEST_F(TestDoubleValidityKernels, DoubleScalarIsFinite) {
CheckScalarUnary("is_finite", MakeNullScalar(float64()), MakeNullScalar(boolean()));
CheckScalarUnary("is_finite", MakeScalar(42.0), MakeScalar(true));
CheckScalarUnary("is_finite", MakeScalar(std::nan("")), MakeScalar(false));
CheckScalarUnary("is_finite", MakeScalar(std::numeric_limits<double>::infinity()),
MakeScalar(false));
CheckScalarUnary("is_finite", MakeScalar(-std::numeric_limits<double>::infinity()),
MakeScalar(false));
}

TEST_F(TestFloatValidityKernels, FloatArrayIsInf) {
// All Inf
CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[Inf, -Inf, Inf, -Inf, Inf]"),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
// No Inf
CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
// Some Infs
CheckScalarUnary("is_inf", ArrayFromJSON(float32(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
}

TEST_F(TestDoubleValidityKernels, DoubleArrayIsInf) {
// All Inf
CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[Inf, -Inf, Inf, -Inf, Inf]"),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
// No Inf
CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, NaN, null]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
// Some Infs
CheckScalarUnary("is_inf", ArrayFromJSON(float64(), "[0.0, Inf, 2.0, -Inf, NaN, null]"),
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
}

TEST_F(TestFloatValidityKernels, FloatScalarIsInf) {
CheckScalarUnary("is_inf", MakeNullScalar(float32()), MakeNullScalar(boolean()));
CheckScalarUnary("is_inf", MakeScalar(42.0f), MakeScalar(false));
CheckScalarUnary("is_inf", MakeScalar(std::nanf("")), MakeScalar(false));
CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<float>::infinity()),
MakeScalar(true));
CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<float>::infinity()),
MakeScalar(true));
}

TEST_F(TestDoubleValidityKernels, DoubleScalarIsInf) {
CheckScalarUnary("is_inf", MakeNullScalar(float64()), MakeNullScalar(boolean()));
CheckScalarUnary("is_inf", MakeScalar(42.0), MakeScalar(false));
CheckScalarUnary("is_inf", MakeScalar(std::nan("")), MakeScalar(false));
CheckScalarUnary("is_inf", MakeScalar(std::numeric_limits<double>::infinity()),
MakeScalar(true));
CheckScalarUnary("is_inf", MakeScalar(-std::numeric_limits<double>::infinity()),
MakeScalar(true));
}

TEST_F(TestFloatValidityKernels, FloatArrayIsNan) {
// All NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
// No NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
// Some NaNs
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
}

Expand All @@ -105,23 +197,31 @@ TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) {
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
// No NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, Inf, null]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
// Some NaNs
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, Inf, null]"),
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
}

TEST_F(TestFloatValidityKernels, FloatScalarIsNan) {
CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean()));
CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false));
CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true));
CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<float>::infinity()),
MakeScalar(false));
CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<float>::infinity()),
MakeScalar(false));
}

TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) {
CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean()));
CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false));
CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true));
CheckScalarUnary("is_nan", MakeScalar(std::numeric_limits<double>::infinity()),
MakeScalar(false));
CheckScalarUnary("is_nan", MakeScalar(-std::numeric_limits<double>::infinity()),
MakeScalar(false));
}

} // namespace compute
Expand Down
29 changes: 19 additions & 10 deletions docs/source/cpp/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -638,31 +638,40 @@ Structural transforms
+==========================+============+================================================+=====================+=========+
| fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | \(1) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| is_nan | Unary | Float, Double | Boolean | \(2) |
| is_finite | Unary | Float, Double | Boolean | \(2) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| is_null | Unary | Any | Boolean | \(3) |
| is_inf | Unary | Float, Double | Boolean | \(3) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| is_valid | Unary | Any | Boolean | \(4) |
| is_nan | Unary | Float, Double | Boolean | \(4) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| list_value_length | Unary | List-like | Int32 or Int64 | \(5) |
| is_null | Unary | Any | Boolean | \(5) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| project | Varargs | Any | Struct | \(6) |
| is_valid | Unary | Any | Boolean | \(6) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| list_value_length | Unary | List-like | Int32 or Int64 | \(7) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| project | Varargs | Any | Struct | \(8) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+

* \(1) First input must be an array, second input a scalar of the same type.
Output is an array of the same type as the inputs, and with the same values
as the first input, except for nulls replaced with the second input value.

* \(2) Output is true iff the corresponding input element is NaN.
* \(2) Output is true iff the corresponding input element is finite (not Infinity,
-Infinity, or NaN).

* \(3) Output is true iff the corresponding input element is Infinity/-Infinity.

* \(4) Output is true iff the corresponding input element is NaN.

* \(3) Output is true iff the corresponding input element is null.
* \(5) Output is true iff the corresponding input element is null.

* \(4) Output is true iff the corresponding input element is non-null.
* \(6) Output is true iff the corresponding input element is non-null.

* \(5) Each output element is the length of the corresponding input element
* \(7) Each output element is the length of the corresponding input element
(null if input is null). Output type is Int32 for List, Int64 for LargeList.

* \(6) The output struct's field types are the types of its arguments. The
* \(8) The output struct's field types are the types of its arguments. The
field names are specified using an instance of :struct:`ProjectOptions`.
The output shape will be scalar if all inputs are scalar, otherwise any
scalars will be broadcast to arrays.
Expand Down
3 changes: 3 additions & 0 deletions docs/source/python/api/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,9 @@ Structural Transforms

binary_length
fill_null
is_finite
is_inf
is_nan
is_null
is_valid
list_value_length
Expand Down
3 changes: 2 additions & 1 deletion r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,10 @@ S3method(head,ArrowDatum)
S3method(head,ArrowTabular)
S3method(head,Dataset)
S3method(head,arrow_dplyr_query)
S3method(is.finite,ArrowDatum)
S3method(is.infinite,ArrowDatum)
S3method(is.na,ArrowDatum)
S3method(is.na,Expression)
S3method(is.na,Scalar)
S3method(is.nan,ArrowDatum)
S3method(is_in,ArrowDatum)
S3method(is_in,default)
Expand Down
14 changes: 14 additions & 0 deletions r/R/arrow-datum.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,20 @@ ArrowDatum <- R6Class("ArrowDatum", inherit = ArrowObject,
#' @export
length.ArrowDatum <- function(x) x$length()

#' @export
is.finite.ArrowDatum <- function(x) {
is_fin <- call_function("is_finite", x)
# for compatibility with base::is.finite(), return FALSE for NA_real_
is_fin & !is.na(is_fin)
}

#' @export
is.infinite.ArrowDatum <- function(x) {
is_inf <- call_function("is_inf", x)
# for compatibility with base::is.infinite(), return FALSE for NA_real_
is_inf & !is.na(is_inf)
}

#' @export
is.na.ArrowDatum <- function(x) call_function("is_null", x)

Expand Down
12 changes: 12 additions & 0 deletions r/R/dplyr-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ nse_funcs$between <- function(x, left, right) {
x >= left & x <= right
}

nse_funcs$is.finite <- function(x) {
is_fin <- Expression$create("is_finite", x)
# for compatibility with base::is.finite(), return FALSE for NA_real_
is_fin & !nse_funcs$is.na(is_fin)
}

nse_funcs$is.infinite <- function(x) {
is_inf <- Expression$create("is_inf", x)
# for compatibility with base::is.infinite(), return FALSE for NA_real_
is_inf & !nse_funcs$is.na(is_inf)
}

# as.* type casting functions
# as.factor() is mapped in expression.R
nse_funcs$as.character <- function(x) {
Expand Down
3 changes: 1 addition & 2 deletions r/R/expression.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@
"as.factor" = "dictionary_encode",
"is.na" = "is_null",
"is.nan" = "is_nan",
# nchar is defined in dplyr.R because it is more complex
# "nchar" = "utf8_length",
# nchar is defined in dplyr-functions.R
"tolower" = "utf8_lower",
"toupper" = "utf8_upper",
# stringr spellings of those
Expand Down
3 changes: 0 additions & 3 deletions r/R/scalar.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,5 @@ StructScalar <- R6Class("StructScalar",
#' @export
length.Scalar <- function(x) 1L

#' @export
is.na.Scalar <- function(x) !x$is_valid

#' @export
sort.Scalar <- function(x, decreasing = FALSE, ...) x
16 changes: 16 additions & 0 deletions r/tests/testthat/test-chunked-array.R
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,22 @@ test_that("ChunkedArray handles !!! splicing", {
expect_equal(x$num_chunks, 3L)
})

test_that("ChunkedArray handles Inf", {
data <- list(c(Inf, 2:10), c(1:3, Inf, 5L), 1:10)
x <- chunked_array(!!!data)
expect_equal(x$type, float64())
expect_equal(x$num_chunks, 3L)
expect_equal(length(x), 25L)
expect_equal(as.vector(x), c(c(Inf, 2:10), c(1:3, Inf, 5), 1:10))

chunks <- x$chunks
expect_equal(as.vector(is.infinite(chunks[[2]])), is.infinite(data[[2]]))
expect_equal(
as.vector(is.infinite(x)),
c(is.infinite(data[[1]]), is.infinite(data[[2]]), is.infinite(data[[3]]))
)
})

test_that("ChunkedArray handles NA", {
data <- list(1:10, c(NA, 2:10), c(1:3, NA, 5L))
x <- chunked_array(!!!data)
Expand Down
Loading