Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ Result<Datum> Compare(const Datum& left, const Datum& right, CompareOptions opti

SCALAR_EAGER_UNARY(IsValid, "is_valid")
SCALAR_EAGER_UNARY(IsNull, "is_null")
SCALAR_EAGER_UNARY(IsNan, "is_nan")

Result<Datum> FillNull(const Datum& values, const Datum& fill_value, ExecContext* ctx) {
return CallFunction("fill_null", {values, fill_value}, ctx);
Expand Down
12 changes: 12 additions & 0 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,18 @@ Result<Datum> IsValid(const Datum& values, ExecContext* ctx = NULLPTR);
ARROW_EXPORT
Result<Datum> IsNull(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief IsNan returns true for each element of `values` that is NaN,
/// false otherwise
///
/// \param[in] values input to look for NaN
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 3.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> IsNan(const Datum& values, ExecContext* ctx = NULLPTR);

/// \brief FillNull replaces each null element in `values`
/// with `fill_value`
///
Expand Down
32 changes: 32 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_validity.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
// specific language governing permissions and limitations
// under the License.

#include <cmath>

#include "arrow/compute/kernels/common.h"

#include "arrow/util/bit_util.h"
Expand Down Expand Up @@ -74,6 +76,13 @@ struct IsNullOperator {
}
};

struct IsNanOperator {
template <typename OutType, typename InType>
static constexpr OutType Call(KernelContext*, const InType& value) {
return std::isnan(value);
}
};

void MakeFunction(std::string name, const FunctionDoc* doc,
std::vector<InputType> in_types, OutputType out_type,
ArrayKernelExec exec, FunctionRegistry* registry,
Expand All @@ -90,6 +99,23 @@ void MakeFunction(std::string name, const FunctionDoc* doc,
DCHECK_OK(registry->AddFunction(std::move(func)));
}

template <typename InType>
void AddIsNanKernel(const std::shared_ptr<DataType>& ty, ScalarFunction* func) {
DCHECK_OK(
func->AddKernel({ty}, boolean(),
applicator::ScalarUnary<BooleanType, InType, IsNanOperator>::Exec));
}

std::shared_ptr<ScalarFunction> MakeIsNanFunction(std::string name,
const FunctionDoc* doc) {
auto func = std::make_shared<ScalarFunction>(name, Arity::Unary(), doc);

AddIsNanKernel<FloatType>(float32(), func.get());
AddIsNanKernel<DoubleType>(float64(), func.get());

return func;
}

void IsValidExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const Datum& arg0 = batch[0];
if (arg0.type()->id() == Type::NA) {
Expand Down Expand Up @@ -132,6 +158,10 @@ const FunctionDoc is_null_doc("Return true if null",
("For each input value, emit true iff the value is null."),
{"values"});

const FunctionDoc is_nan_doc("Return true if NaN",
("For each input value, emit true iff the value is NaN."),
{"values"});

} // namespace

void RegisterScalarValidity(FunctionRegistry* registry) {
Expand All @@ -141,6 +171,8 @@ void RegisterScalarValidity(FunctionRegistry* registry) {
MakeFunction("is_null", &is_null_doc, {ValueDescr::ANY}, boolean(), IsNullExec,
registry, MemAllocation::PREALLOCATE,
/*can_write_into_slices=*/true);

DCHECK_OK(registry->AddFunction(MakeIsNanFunction("is_nan", &is_nan_doc)));
}

} // namespace internal
Expand Down
59 changes: 48 additions & 11 deletions cpp/src/arrow/compute/kernels/scalar_validity_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,61 +31,98 @@
namespace arrow {
namespace compute {

template <typename ArrowType>
class TestValidityKernels : public ::testing::Test {
protected:
// XXX Since IsValid and IsNull don't touch any buffers but the null bitmap
// testing multiple types seems redundant.
using ArrowType = BooleanType;

static std::shared_ptr<DataType> type_singleton() {
return TypeTraits<ArrowType>::type_singleton();
}
};

TEST_F(TestValidityKernels, ArrayIsValid) {
using TestBooleanValidityKernels = TestValidityKernels<BooleanType>;
using TestFloatValidityKernels = TestValidityKernels<FloatType>;
using TestDoubleValidityKernels = TestValidityKernels<DoubleType>;

TEST_F(TestBooleanValidityKernels, ArrayIsValid) {
CheckScalarUnary("is_valid", type_singleton(), "[]", type_singleton(), "[]");
CheckScalarUnary("is_valid", type_singleton(), "[null]", type_singleton(), "[false]");
CheckScalarUnary("is_valid", type_singleton(), "[1]", type_singleton(), "[true]");
CheckScalarUnary("is_valid", type_singleton(), "[null, 1, 0, null]", type_singleton(),
"[false, true, true, false]");
}

TEST_F(TestValidityKernels, IsValidIsNullNullType) {
TEST_F(TestBooleanValidityKernels, IsValidIsNullNullType) {
CheckScalarUnary("is_null", std::make_shared<NullArray>(5),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
CheckScalarUnary("is_valid", std::make_shared<NullArray>(5),
ArrayFromJSON(boolean(), "[false, false, false, false, false]"));
}

TEST_F(TestValidityKernels, ArrayIsValidBufferPassthruOptimization) {
TEST_F(TestBooleanValidityKernels, ArrayIsValidBufferPassthruOptimization) {
Datum arg = ArrayFromJSON(boolean(), "[null, 1, 0, null]");
ASSERT_OK_AND_ASSIGN(auto validity, arrow::compute::IsValid(arg));
ASSERT_EQ(validity.array()->buffers[1], arg.array()->buffers[0]);
}

TEST_F(TestValidityKernels, ScalarIsValid) {
TEST_F(TestBooleanValidityKernels, ScalarIsValid) {
CheckScalarUnary("is_valid", MakeScalar(19.7), MakeScalar(true));
CheckScalarUnary("is_valid", MakeNullScalar(float64()), MakeScalar(false));
}

TEST_F(TestValidityKernels, ArrayIsNull) {
TEST_F(TestBooleanValidityKernels, ArrayIsNull) {
CheckScalarUnary("is_null", type_singleton(), "[]", type_singleton(), "[]");
CheckScalarUnary("is_null", type_singleton(), "[null]", type_singleton(), "[true]");
CheckScalarUnary("is_null", type_singleton(), "[1]", type_singleton(), "[false]");
CheckScalarUnary("is_null", type_singleton(), "[null, 1, 0, null]", type_singleton(),
"[true, false, false, true]");
}

TEST_F(TestValidityKernels, IsNullSetsZeroNullCount) {
TEST_F(TestBooleanValidityKernels, IsNullSetsZeroNullCount) {
auto arr = ArrayFromJSON(int32(), "[1, 2, 3, 4]");
std::shared_ptr<ArrayData> result = (*IsNull(arr)).array();
ASSERT_EQ(result->null_count, 0);
}

TEST_F(TestValidityKernels, ScalarIsNull) {
TEST_F(TestBooleanValidityKernels, ScalarIsNull) {
CheckScalarUnary("is_null", MakeScalar(19.7), MakeScalar(false));
CheckScalarUnary("is_null", MakeNullScalar(float64()), MakeScalar(true));
}

TEST_F(TestFloatValidityKernels, FloatArrayIsNan) {
// All NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[NaN, NaN, NaN, NaN, NaN]"),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
// No NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
// Some NaNs
CheckScalarUnary("is_nan", ArrayFromJSON(float32(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
}

TEST_F(TestDoubleValidityKernels, DoubleArrayIsNan) {
// All NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[NaN, NaN, NaN, NaN, NaN]"),
ArrayFromJSON(boolean(), "[true, true, true, true, true]"));
// No NaN
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, 3.0, 4.0, null]"),
ArrayFromJSON(boolean(), "[false, false, false, false, false, null]"));
// Some NaNs
CheckScalarUnary("is_nan", ArrayFromJSON(float64(), "[0.0, NaN, 2.0, NaN, 4.0, null]"),
ArrayFromJSON(boolean(), "[false, true, false, true, false, null]"));
}

TEST_F(TestFloatValidityKernels, FloatScalarIsNan) {
CheckScalarUnary("is_nan", MakeNullScalar(float32()), MakeNullScalar(boolean()));
CheckScalarUnary("is_nan", MakeScalar(42.0f), MakeScalar(false));
CheckScalarUnary("is_nan", MakeScalar(std::nanf("")), MakeScalar(true));
}

TEST_F(TestDoubleValidityKernels, DoubleScalarIsNan) {
CheckScalarUnary("is_nan", MakeNullScalar(float64()), MakeNullScalar(boolean()));
CheckScalarUnary("is_nan", MakeScalar(42.0), MakeScalar(false));
CheckScalarUnary("is_nan", MakeScalar(std::nan("")), MakeScalar(true));
}

} // namespace compute
} // namespace arrow
14 changes: 9 additions & 5 deletions docs/source/cpp/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -453,22 +453,26 @@ Structural transforms
+==========================+============+================================================+=====================+=========+
| fill_null | Binary | Boolean, Null, Numeric, Temporal, String-like | Input type | \(1) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| is_null | Unary | Any | Boolean | \(2) |
| is_nan | Unary | Float, Double | Boolean | \(2) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| is_valid | Unary | Any | Boolean | \(2) |
| is_null | Unary | Any | Boolean | \(3) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| list_value_length | Unary | List-like | Int32 or Int64 | \(4) |
| is_valid | Unary | Any | Boolean | \(4) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| list_value_length | Unary | List-like | Int32 or Int64 | \(5) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+

* \(1) First input must be an array, second input a scalar of the same type.
Output is an array of the same type as the inputs, and with the same values
as the first input, except for nulls replaced with the second input value.

* \(2) Output is true iff the corresponding input element is non-null.
* \(2) Output is true iff the corresponding input element is NaN.

* \(3) Output is true iff the corresponding input element is null.

* \(4) Each output element is the length of the corresponding input element
* \(4) Output is true iff the corresponding input element is non-null.

* \(5) Each output element is the length of the corresponding input element
(null if input is null). Output type is Int32 for List, Int64 for LargeList.

Conversions
Expand Down