From ea4629cded68384981845c77f38564a9a6ac3adc Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 24 May 2021 14:04:52 -0400
Subject: [PATCH 01/14] ARROW-12751: [C++] Implement minimum/maximum kernels
---
cpp/src/arrow/compute/api_scalar.cc | 10 +
cpp/src/arrow/compute/api_scalar.h | 23 ++
.../compute/kernels/scalar_arithmetic.cc | 303 ++++++++++++++++++
.../compute/kernels/scalar_arithmetic_test.cc | 299 +++++++++++++++++
docs/source/cpp/compute.rst | 14 +
docs/source/python/api/compute.rst | 8 +
6 files changed, 657 insertions(+)
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index 105ba7a0589..f2fdeb5eead 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -63,6 +63,16 @@ SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
+Result Maximum(const std::vector& args, MinMaxOptions options,
+ ExecContext* ctx) {
+ return CallFunction("maximum", args, &options, ctx);
+}
+
+Result Minimum(const std::vector& args, MinMaxOptions options,
+ ExecContext* ctx) {
+ return CallFunction("minimum", args, &options, ctx);
+}
+
// ----------------------------------------------------------------------
// Set-related operations
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 0a05b123a44..e389e6d344e 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -42,6 +42,11 @@ struct ArithmeticOptions : public FunctionOptions {
bool check_overflow;
};
+struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
+ MinMaxOptions() : skip_nulls(true) {}
+ bool skip_nulls;
+};
+
struct ARROW_EXPORT MatchSubstringOptions : public FunctionOptions {
explicit MatchSubstringOptions(std::string pattern, bool ignore_case = false)
: pattern(std::move(pattern)), ignore_case(ignore_case) {}
@@ -253,6 +258,24 @@ Result Power(const Datum& left, const Datum& right,
ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);
+/// \brief
+///
+/// \param[in] args
+/// \param[in] ctx the function execution context, optional
+/// \return
+ARROW_EXPORT
+Result Maximum(const std::vector& args, MinMaxOptions options = {},
+ ExecContext* ctx = NULLPTR);
+
+/// \brief
+///
+/// \param[in] args
+/// \param[in] ctx the function execution context, optional
+/// \return
+ARROW_EXPORT
+Result Minimum(const std::vector& args, MinMaxOptions options = {},
+ ExecContext* ctx = NULLPTR);
+
/// \brief Compare a numeric array with a scalar.
///
/// \param[in] left datum to compare, must be an Array
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 743d2e3fc0e..2501517fb80 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -18,8 +18,10 @@
#include
#include
+#include "arrow/compute/api_scalar.h"
#include "arrow/compute/kernels/common.h"
#include "arrow/type_traits.h"
+#include "arrow/util/bitmap_ops.h"
#include "arrow/util/int_util_internal.h"
#include "arrow/util/macros.h"
@@ -397,6 +399,30 @@ struct PowerChecked {
}
};
+struct Minimum {
+ template
+ static enable_if_floating_point Call(T left, T right) {
+ return std::fmin(left, right);
+ }
+
+ template
+ static enable_if_integer Call(T left, T right) {
+ return std::min(left, right);
+ }
+};
+
+struct Maximum {
+ template
+ static enable_if_floating_point Call(T left, T right) {
+ return std::fmax(left, right);
+ }
+
+ template
+ static enable_if_integer Call(T left, T right) {
+ return std::max(left, right);
+ }
+};
+
// Generate a kernel given an arithmetic functor
template class KernelGenerator, typename Op>
ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
@@ -428,6 +454,45 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
}
}
+template class KernelGenerator, typename Op>
+ArrayKernelExec MinMaxExecFromOp(detail::GetTypeId get_id) {
+ switch (get_id.id) {
+ case Type::INT8:
+ return KernelGenerator::Exec;
+ case Type::UINT8:
+ return KernelGenerator::Exec;
+ case Type::INT16:
+ return KernelGenerator::Exec;
+ case Type::UINT16:
+ return KernelGenerator::Exec;
+ case Type::INT32:
+ return KernelGenerator::Exec;
+ case Type::DATE32:
+ return KernelGenerator::Exec;
+ case Type::TIME32:
+ return KernelGenerator::Exec;
+ case Type::UINT32:
+ return KernelGenerator::Exec;
+ case Type::INT64:
+ return KernelGenerator::Exec;
+ case Type::TIMESTAMP:
+ return KernelGenerator::Exec;
+ case Type::DATE64:
+ return KernelGenerator::Exec;
+ case Type::TIME64:
+ return KernelGenerator::Exec;
+ case Type::UINT64:
+ return KernelGenerator::Exec;
+ case Type::FLOAT:
+ return KernelGenerator::Exec;
+ case Type::DOUBLE:
+ return KernelGenerator::Exec;
+ default:
+ DCHECK(false);
+ return ExecFail;
+ }
+}
+
struct ArithmeticFunction : ScalarFunction {
using ScalarFunction::ScalarFunction;
@@ -453,6 +518,26 @@ struct ArithmeticFunction : ScalarFunction {
}
};
+struct ArithmeticVarArgsFunction : ScalarFunction {
+ using ScalarFunction::ScalarFunction;
+
+ Result DispatchBest(std::vector* values) const override {
+ RETURN_NOT_OK(CheckArity(*values));
+
+ using arrow::compute::detail::DispatchExactImpl;
+ if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+
+ EnsureDictionaryDecoded(values);
+
+ if (auto type = CommonNumeric(*values)) {
+ ReplaceTypes(type, values);
+ }
+
+ if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
+ return arrow::compute::detail::NoMatchingKernel(this, *values);
+ }
+};
+
template
std::shared_ptr MakeArithmeticFunction(std::string name,
const FunctionDoc* doc) {
@@ -516,6 +601,203 @@ std::shared_ptr MakeUnarySignedArithmeticFunctionNotNull(
return func;
}
+using MinMaxState = OptionsWrapper;
+
+// Implement a variadic scalar min/max kernel.
+template
+struct ScalarMinMax {
+ using OutValue = typename GetOutputType::T;
+
+ static void ExecScalar(const ExecBatch& batch, const MinMaxOptions& options,
+ Datum* out) {
+ // All arguments are scalar
+ OutValue value{};
+ bool valid = false;
+ for (const auto arg : batch.values) {
+ const auto& scalar = *arg.scalar();
+ if (!scalar.is_valid) {
+ if (options.skip_nulls) continue;
+ out->scalar()->is_valid = false;
+ return;
+ }
+ if (!valid) {
+ value = UnboxScalar::Unbox(scalar);
+ valid = true;
+ } else {
+ value = Op::Call(value, UnboxScalar::Unbox(scalar));
+ }
+ }
+ out->scalar()->is_valid = valid;
+ if (valid) {
+ BoxScalar::Box(value, out->scalar().get());
+ }
+ }
+
+ static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+ const MinMaxOptions& options = MinMaxState::Get(ctx);
+ const auto descrs = batch.GetDescriptors();
+ const bool all_scalar =
+ std::all_of(batch.values.begin(), batch.values.end(),
+ [](const Datum& d) { return d.descr().shape == ValueDescr::SCALAR; });
+ if (all_scalar) {
+ ExecScalar(batch, options, out);
+ return Status::OK();
+ }
+
+ ArrayData* output = out->mutable_array();
+
+ // Exactly one array (output = input)
+ if (batch.values.size() == 1) {
+ *output = *batch[0].array();
+ return Status::OK();
+ }
+
+ // At least one array, two or more arguments
+ int64_t length = 0;
+ for (const auto arg : batch.values) {
+ if (arg.is_array()) {
+ length = arg.array()->length;
+ break;
+ }
+ }
+
+ if (!options.skip_nulls) {
+ // We can precompute the validity buffer in this case
+ // If output will be all null, just return
+ for (auto arg : batch.values) {
+ if (arg.is_scalar() && !arg.scalar()->is_valid) {
+ ARROW_ASSIGN_OR_RAISE(
+ auto array, MakeArrayFromScalar(*arg.scalar(), length, ctx->memory_pool()));
+ *output = *array->data();
+ return Status::OK();
+ } else if (arg.is_array() && arg.array()->null_count == length) {
+ *output = *arg.array();
+ return Status::OK();
+ }
+ }
+ // AND together the validity buffers of all arrays
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
+ bool first = true;
+ for (auto arg : batch.values) {
+ if (!arg.is_array()) continue;
+ auto arr = arg.array();
+ if (!arr->buffers[0]) continue;
+ if (first) {
+ ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset, length,
+ output->buffers[0]->mutable_data(),
+ /*dest_offset=*/0);
+ first = false;
+ } else {
+ ::arrow::internal::BitmapAnd(
+ output->buffers[0]->data(), /*left_offset=*/0, arr->buffers[0]->data(),
+ arr->offset, length, /*out_offset=*/0, output->buffers[0]->mutable_data());
+ }
+ }
+ }
+
+ if (batch.values[0].is_scalar()) {
+ // Promote to output array
+ ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*batch.values[0].scalar(),
+ length, ctx->memory_pool()));
+ *output = *array->data();
+ if (!batch.values[0].scalar()->is_valid) {
+ // MakeArrayFromScalar reuses the same buffer for null/data in
+ // this case, allocate a real one since we'll be writing to it
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
+ ::arrow::internal::BitmapXor(output->buffers[0]->data(), /*left_offset=*/0,
+ output->buffers[0]->data(), /*right_offset=*/0,
+ length, /*out_offset=*/0,
+ output->buffers[0]->mutable_data());
+ }
+ } else {
+ // Copy to output array
+ const ArrayData& input = *batch.values[0].array();
+ ARROW_ASSIGN_OR_RAISE(output->buffers[1], ctx->Allocate(length * sizeof(OutValue)));
+ if (options.skip_nulls && input.buffers[0]) {
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
+ ::arrow::internal::CopyBitmap(input.buffers[0]->data(), input.offset, length,
+ output->buffers[0]->mutable_data(),
+ /*dest_offset=*/0);
+ }
+ // This won't work for nested or variable-sized types
+ std::memcpy(output->buffers[1]->mutable_data(),
+ input.buffers[1]->data() + (input.offset * sizeof(OutValue)),
+ length * sizeof(OutValue));
+ }
+
+ for (size_t i = 1; i < batch.values.size(); i++) {
+ OutputArrayWriter writer(out->mutable_array());
+ if (batch.values[i].is_scalar()) {
+ auto scalar = batch.values[i].scalar();
+ if (!scalar->is_valid) continue;
+ auto value = UnboxScalar::Unbox(*scalar);
+ VisitArrayValuesInline(
+ *out->array(), [&](OutValue u) { writer.Write(Op::Call(u, value)); },
+ [&]() { writer.Write(value); });
+ if (options.skip_nulls && output->buffers[0]) output->buffers[0] = nullptr;
+ } else {
+ const ArrayData& arr = *batch.values[i].array();
+ ArrayIterator out_it(*output);
+ int64_t index = 0;
+ VisitArrayValuesInline(
+ arr,
+ [&](OutValue value) {
+ auto u = out_it();
+ if (!output->buffers[0] ||
+ BitUtil::GetBit(output->buffers[0]->data(), index)) {
+ writer.Write(Op::Call(u, value));
+ } else {
+ writer.Write(value);
+ }
+ index++;
+ },
+ [&]() {
+ // RHS is null, preserve the LHS
+ writer.values++;
+ index++;
+ out_it();
+ });
+ // When not skipping nulls, we pre-compute the validity buffer
+ if (options.skip_nulls && output->buffers[0]) {
+ if (arr.buffers[0]) {
+ ::arrow::internal::BitmapOr(
+ output->buffers[0]->data(), /*left_offset=*/0, arr.buffers[0]->data(),
+ /*right_offset=*/arr.offset, length, /*out_offset=*/0,
+ output->buffers[0]->mutable_data());
+ } else {
+ output->buffers[0] = nullptr;
+ }
+ }
+ }
+ }
+ output->null_count = -1;
+ return Status::OK();
+ }
+};
+
+template
+std::shared_ptr MakeScalarMinMax(std::string name,
+ const FunctionDoc* doc) {
+ auto func = std::make_shared(name, Arity::VarArgs(), doc);
+ for (const auto& ty : NumericTypes()) {
+ auto exec = MinMaxExecFromOp(ty);
+ ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+ MinMaxState::Init};
+ kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE;
+ DCHECK_OK(func->AddKernel(std::move(kernel)));
+ }
+ for (const auto& ty : TemporalTypes()) {
+ auto exec = MinMaxExecFromOp(ty);
+ ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
+ MinMaxState::Init};
+ kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
+ kernel.mem_allocation = MemAllocation::type::NO_PREALLOCATE;
+ DCHECK_OK(func->AddKernel(std::move(kernel)));
+ }
+ return func;
+}
+
const FunctionDoc absolute_value_doc{
"Calculate the absolute value of the argument element-wise",
("Results will wrap around on integer overflow.\n"
@@ -602,6 +884,20 @@ const FunctionDoc pow_checked_doc{
("An error is returned when integer to negative integer power is encountered,\n"
"or integer overflow is encountered."),
{"base", "exponent"}};
+
+const FunctionDoc minimum_doc{
+ "Find the element-wise minimum value",
+ ("Nulls will be ignored (default) or propagated. "
+ "NaN will be taken over null, but not over any valid float."),
+ {"*args"},
+ "MinMaxOptions"};
+
+const FunctionDoc maximum_doc{
+ "Find the element-wise maximum value",
+ ("Nulls will be ignored (default) or propagated. "
+ "NaN will be taken over null, but not over any valid float."),
+ {"*args"},
+ "MinMaxOptions"};
} // namespace
void RegisterScalarArithmetic(FunctionRegistry* registry) {
@@ -677,6 +973,13 @@ void RegisterScalarArithmetic(FunctionRegistry* registry) {
auto power_checked =
MakeArithmeticFunctionNotNull("power_checked", &pow_checked_doc);
DCHECK_OK(registry->AddFunction(std::move(power_checked)));
+
+ // ----------------------------------------------------------------------
+ auto minimum = MakeScalarMinMax("minimum", &minimum_doc);
+ DCHECK_OK(registry->AddFunction(std::move(minimum)));
+
+ auto maximum = MakeScalarMinMax("maximum", &maximum_doc);
+ DCHECK_OK(registry->AddFunction(std::move(maximum)));
}
} // namespace internal
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index ff66fcf1d12..4ce85b93db6 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -17,6 +17,7 @@
#include
#include
+#include
#include
#include
#include
@@ -312,6 +313,88 @@ class TestBinaryArithmeticUnsigned : public TestBinaryArithmeticIntegral {};
template
class TestBinaryArithmeticFloating : public TestBinaryArithmetic {};
+template
+class TestVarArgsArithmetic : public TestBase {
+ protected:
+ static std::shared_ptr type_singleton() {
+ return TypeTraits::type_singleton();
+ }
+
+ using VarArgsFunction = std::function(const std::vector&,
+ MinMaxOptions, ExecContext*)>;
+
+ Datum scalar(const std::string& value) {
+ return ScalarFromJSON(type_singleton(), value);
+ }
+
+ Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
+
+ Datum Eval(VarArgsFunction func, const std::vector& args) {
+ EXPECT_OK_AND_ASSIGN(auto actual, func(args, min_max_options_, nullptr));
+ if (actual.is_array()) {
+ auto arr = actual.make_array();
+ ARROW_EXPECT_OK(arr->ValidateFull());
+ }
+ return actual;
+ }
+
+ void AssertNullScalar(VarArgsFunction func, const std::vector& args) {
+ auto datum = this->Eval(func, args);
+ ASSERT_TRUE(datum.is_scalar());
+ ASSERT_FALSE(datum.scalar()->is_valid);
+ }
+
+ void Assert(VarArgsFunction func, Datum expected, const std::vector& args) {
+ std::stringstream ss;
+ ss << "Inputs:";
+ for (const auto& arg : args) {
+ ss << ' ';
+ if (arg.is_scalar())
+ ss << arg.scalar()->ToString();
+ else if (arg.is_array())
+ ss << arg.make_array()->ToString();
+ else
+ ss << arg.ToString();
+ }
+ SCOPED_TRACE(ss.str());
+
+ auto actual = Eval(func, args);
+ AssertDatumsApproxEqual(expected, actual, /*verbose=*/true, equal_options_);
+ }
+
+ void SetNansEqual(bool value = true) {
+ this->equal_options_ = equal_options_.nans_equal(value);
+ }
+
+ EqualOptions equal_options_ = EqualOptions::Defaults();
+ MinMaxOptions min_max_options_;
+};
+
+template
+class TestVarArgsArithmeticNumeric : public TestVarArgsArithmetic {};
+
+template
+class TestVarArgsArithmeticFloating : public TestVarArgsArithmetic {};
+
+template
+class TestVarArgsArithmeticParametricTemporal : public TestVarArgsArithmetic {
+ protected:
+ static std::shared_ptr type_singleton() {
+ // Time32 requires second/milli, Time64 requires nano/micro
+ if (TypeTraits::bytes_required(1) == 4) {
+ return std::make_shared(TimeUnit::type::SECOND);
+ } else {
+ return std::make_shared(TimeUnit::type::NANO);
+ }
+ }
+
+ Datum scalar(const std::string& value) {
+ return ScalarFromJSON(type_singleton(), value);
+ }
+
+ Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
+};
+
// InputType - OutputType pairs
using IntegralTypes = testing::Types;
@@ -324,6 +407,12 @@ using UnsignedIntegerTypes =
// TODO(kszucs): add half-float
using FloatingTypes = testing::Types;
+using NumericBasedTypes =
+ ::testing::Types;
+
+using ParametricTemporalTypes = ::testing::Types;
+
TYPED_TEST_SUITE(TestUnaryArithmeticIntegral, IntegralTypes);
TYPED_TEST_SUITE(TestUnaryArithmeticSigned, SignedIntegerTypes);
TYPED_TEST_SUITE(TestUnaryArithmeticUnsigned, UnsignedIntegerTypes);
@@ -334,6 +423,10 @@ TYPED_TEST_SUITE(TestBinaryArithmeticSigned, SignedIntegerTypes);
TYPED_TEST_SUITE(TestBinaryArithmeticUnsigned, UnsignedIntegerTypes);
TYPED_TEST_SUITE(TestBinaryArithmeticFloating, FloatingTypes);
+TYPED_TEST_SUITE(TestVarArgsArithmeticNumeric, NumericBasedTypes);
+TYPED_TEST_SUITE(TestVarArgsArithmeticFloating, FloatingTypes);
+TYPED_TEST_SUITE(TestVarArgsArithmeticParametricTemporal, ParametricTemporalTypes);
+
TYPED_TEST(TestBinaryArithmeticIntegral, Add) {
for (auto check_overflow : {false, true}) {
this->SetOverflowCheck(check_overflow);
@@ -1161,5 +1254,211 @@ TYPED_TEST(TestUnaryArithmeticFloating, AbsoluteValue) {
}
}
+TYPED_TEST(TestVarArgsArithmeticNumeric, Minimum) {
+ this->AssertNullScalar(Minimum, {});
+ this->AssertNullScalar(Minimum, {this->scalar("null"), this->scalar("null")});
+
+ this->Assert(Minimum, this->scalar("0"), {this->scalar("0")});
+ this->Assert(Minimum, this->scalar("0"),
+ {this->scalar("2"), this->scalar("0"), this->scalar("1")});
+ this->Assert(
+ Minimum, this->scalar("0"),
+ {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
+ this->Assert(Minimum, this->scalar("1"),
+ {this->scalar("null"), this->scalar("null"), this->scalar("1"),
+ this->scalar("null")});
+
+ this->Assert(Minimum, (this->array("[]")), {this->array("[]")});
+ this->Assert(Minimum, this->array("[1, 2, 3, null]"), {this->array("[1, 2, 3, null]")});
+
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, 2, 3, 4]"), this->scalar("2")});
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("2")});
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
+
+ this->Assert(Minimum, this->array("[1, 2, null, 6]"),
+ {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
+ this->Assert(Minimum, this->array("[1, 2, null, 6]"),
+ {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
+ this->Assert(Minimum, this->array("[1, 2, 3, 4]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
+ this->Assert(Minimum, this->array("[1, 2, 3, 4]"),
+ {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
+
+ this->Assert(Minimum, this->array("[1, 1, 1, 1]"),
+ {this->scalar("1"), this->array("[1, 2, 3, 4]")});
+ this->Assert(Minimum, this->array("[1, 1, 1, 1]"),
+ {this->scalar("1"), this->array("[null, null, null, null]")});
+ this->Assert(Minimum, this->array("[1, 1, 1, 1]"),
+ {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+ this->Assert(Minimum, this->array("[null, null, null, null]"),
+ {this->scalar("null"), this->array("[null, null, null, null]")});
+
+ // Test null handling
+ this->min_max_options_.skip_nulls = false;
+ this->AssertNullScalar(Minimum, {this->scalar("null"), this->scalar("null")});
+ this->AssertNullScalar(Minimum, {this->scalar("0"), this->scalar("null")});
+
+ this->Assert(Minimum, this->array("[1, null, 2, 2]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+ this->Assert(Minimum, this->array("[null, null, null, null]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+ this->Assert(Minimum, this->array("[1, null, 2, 2]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+
+ this->Assert(Minimum, this->array("[null, null, null, null]"),
+ {this->scalar("1"), this->array("[null, null, null, null]")});
+ this->Assert(Minimum, this->array("[null, null, null, null]"),
+ {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+}
+
+TYPED_TEST(TestVarArgsArithmeticFloating, Minimum) {
+ this->SetNansEqual();
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("0"), this->scalar("NaN")});
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("NaN"), this->scalar("0")});
+ this->Assert(Maximum, this->scalar("Inf"), {this->scalar("Inf"), this->scalar("NaN")});
+ this->Assert(Maximum, this->scalar("Inf"), {this->scalar("NaN"), this->scalar("Inf")});
+ this->Assert(Maximum, this->scalar("-Inf"),
+ {this->scalar("-Inf"), this->scalar("NaN")});
+ this->Assert(Maximum, this->scalar("-Inf"),
+ {this->scalar("NaN"), this->scalar("-Inf")});
+ this->Assert(Maximum, this->scalar("NaN"), {this->scalar("NaN"), this->scalar("null")});
+ this->Assert(Minimum, this->scalar("0"), {this->scalar("0"), this->scalar("Inf")});
+ this->Assert(Minimum, this->scalar("-Inf"), {this->scalar("0"), this->scalar("-Inf")});
+}
+
+TYPED_TEST(TestVarArgsArithmeticParametricTemporal, Minimum) {
+ // Temporal kernel is implemented with numeric kernel underneath
+ this->AssertNullScalar(Minimum, {});
+ this->AssertNullScalar(Minimum, {this->scalar("null"), this->scalar("null")});
+
+ this->Assert(Minimum, this->scalar("0"), {this->scalar("0")});
+ this->Assert(Minimum, this->scalar("0"), {this->scalar("2"), this->scalar("0")});
+ this->Assert(Minimum, this->scalar("0"), {this->scalar("0"), this->scalar("null")});
+
+ this->Assert(Minimum, (this->array("[]")), {this->array("[]")});
+ this->Assert(Minimum, this->array("[1, 2, 3, null]"), {this->array("[1, 2, 3, null]")});
+
+ this->Assert(Minimum, this->array("[1, 2, 2, 2]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+ this->Assert(Minimum, this->array("[1, 2, 3, 2]"),
+ {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
+}
+
+TYPED_TEST(TestVarArgsArithmeticNumeric, Maximum) {
+ this->AssertNullScalar(Maximum, {});
+ this->AssertNullScalar(Maximum, {this->scalar("null"), this->scalar("null")});
+
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("0")});
+ this->Assert(Maximum, this->scalar("2"),
+ {this->scalar("2"), this->scalar("0"), this->scalar("1")});
+ this->Assert(
+ Maximum, this->scalar("2"),
+ {this->scalar("2"), this->scalar("0"), this->scalar("1"), this->scalar("null")});
+ this->Assert(Maximum, this->scalar("1"),
+ {this->scalar("null"), this->scalar("null"), this->scalar("1"),
+ this->scalar("null")});
+
+ this->Assert(Maximum, (this->array("[]")), {this->array("[]")});
+ this->Assert(Maximum, this->array("[1, 2, 3, null]"), {this->array("[1, 2, 3, null]")});
+
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, 2, 3, 4]"), this->scalar("2")});
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("2")});
+ this->Assert(Maximum, this->array("[4, 4, 4, 4]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[2, 2, 2, 2]")});
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, null, 3, 4]"), this->array("[2, 2, 2, 2]")});
+
+ this->Assert(Maximum, this->array("[4, 2, null, 6]"),
+ {this->array("[1, 2, null, null]"), this->array("[4, null, null, 6]")});
+ this->Assert(Maximum, this->array("[4, 2, null, 6]"),
+ {this->array("[4, null, null, 6]"), this->array("[1, 2, null, null]")});
+ this->Assert(Maximum, this->array("[1, 2, 3, 4]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[null, null, null, null]")});
+ this->Assert(Maximum, this->array("[1, 2, 3, 4]"),
+ {this->array("[null, null, null, null]"), this->array("[1, 2, 3, 4]")});
+
+ this->Assert(Maximum, this->array("[1, 2, 3, 4]"),
+ {this->scalar("1"), this->array("[1, 2, 3, 4]")});
+ this->Assert(Maximum, this->array("[1, 1, 1, 1]"),
+ {this->scalar("1"), this->array("[null, null, null, null]")});
+ this->Assert(Maximum, this->array("[1, 1, 1, 1]"),
+ {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+ this->Assert(Maximum, this->array("[null, null, null, null]"),
+ {this->scalar("null"), this->array("[null, null, null, null]")});
+
+ // Test null handling
+ this->min_max_options_.skip_nulls = false;
+ this->AssertNullScalar(Maximum, {this->scalar("null"), this->scalar("null")});
+ this->AssertNullScalar(Maximum, {this->scalar("0"), this->scalar("null")});
+
+ this->Assert(Maximum, this->array("[4, null, 4, 4]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("2"), this->scalar("4")});
+ this->Assert(Maximum, this->array("[null, null, null, null]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+ this->Assert(Maximum, this->array("[2, null, 3, 4]"),
+ {this->array("[1, 2, 3, 4]"), this->array("[2, null, 2, 2]")});
+
+ this->Assert(Maximum, this->array("[null, null, null, null]"),
+ {this->scalar("1"), this->array("[null, null, null, null]")});
+ this->Assert(Maximum, this->array("[null, null, null, null]"),
+ {this->scalar("null"), this->array("[1, 1, 1, 1]")});
+}
+
+TYPED_TEST(TestVarArgsArithmeticFloating, Maximum) {
+ this->SetNansEqual();
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("0"), this->scalar("NaN")});
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("NaN"), this->scalar("0")});
+ this->Assert(Maximum, this->scalar("Inf"), {this->scalar("Inf"), this->scalar("NaN")});
+ this->Assert(Maximum, this->scalar("Inf"), {this->scalar("NaN"), this->scalar("Inf")});
+ this->Assert(Maximum, this->scalar("-Inf"),
+ {this->scalar("-Inf"), this->scalar("NaN")});
+ this->Assert(Maximum, this->scalar("-Inf"),
+ {this->scalar("NaN"), this->scalar("-Inf")});
+ this->Assert(Maximum, this->scalar("NaN"), {this->scalar("NaN"), this->scalar("null")});
+ this->Assert(Maximum, this->scalar("Inf"), {this->scalar("0"), this->scalar("Inf")});
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("0"), this->scalar("-Inf")});
+}
+
+TYPED_TEST(TestVarArgsArithmeticParametricTemporal, Maximum) {
+ // Temporal kernel is implemented with numeric kernel underneath
+ this->AssertNullScalar(Maximum, {});
+ this->AssertNullScalar(Maximum, {this->scalar("null"), this->scalar("null")});
+
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("0")});
+ this->Assert(Maximum, this->scalar("2"), {this->scalar("2"), this->scalar("0")});
+ this->Assert(Maximum, this->scalar("0"), {this->scalar("0"), this->scalar("null")});
+
+ this->Assert(Maximum, (this->array("[]")), {this->array("[]")});
+ this->Assert(Maximum, this->array("[1, 2, 3, null]"), {this->array("[1, 2, 3, null]")});
+
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, null, 3, 4]"), this->scalar("null"), this->scalar("2")});
+
+ this->Assert(Maximum, this->array("[2, 2, 3, 4]"),
+ {this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
+}
+
} // namespace compute
} // namespace arrow
diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst
index 4e729b055cf..fff47fd70ae 100644
--- a/docs/source/cpp/compute.rst
+++ b/docs/source/cpp/compute.rst
@@ -310,6 +310,20 @@ output element is null.
| less, less_equal | | | |
+--------------------------+------------+---------------------------------------------+---------------------+
+These functions take any number of inputs of numeric type (in which case they
+will be cast to the :ref:`common numeric type ` before
+comparison) or of temporal types. If any input is dictionary encoded it will be
+expanded for the purposes of comparison.
+
++--------------------------+------------+---------------------------------------------+---------------------+-------+
+| Function names | Arity | Input types | Output type | Notes |
++==========================+============+=============================================+=====================+=======+
+| maximum, minimum | Varargs | Numeric and Temporal | Numeric or Temporal | \(1) |
++--------------------------+------------+---------------------------------------------+---------------------+-------+
+
+* \(1) By default, nulls are skipped (but the kernel can be configured to propagate nulls).
+ For floating point values, NaN will be taken over null but not over any other value.
+
Logical functions
~~~~~~~~~~~~~~~~~~
diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst
index 3010776930f..d97a15e18ba 100644
--- a/docs/source/python/api/compute.rst
+++ b/docs/source/python/api/compute.rst
@@ -75,6 +75,14 @@ they return ``null``.
less_equal
not_equal
+These functions take any number of arguments of a numeric or temporal type.
+
+.. autosummary::
+ :toctree: ../generated/
+
+ maximum
+ minimum
+
Logical Functions
-----------------
From 3bed6350d3a7d81d9f60815bc1fa9cb497899caa Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 24 May 2021 18:38:43 -0400
Subject: [PATCH 02/14] ARROW-12751: [C++] Fix build issues
---
cpp/src/arrow/compute/api_scalar.h | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index e389e6d344e..14767918d2e 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -258,20 +258,24 @@ Result Power(const Datum& left, const Datum& right,
ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);
-/// \brief
+/// \brief Find the element-wise maximum of any number of arrays or scalars.
+/// Array values must be the same length.
///
-/// \param[in] args
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
/// \param[in] ctx the function execution context, optional
-/// \return
+/// \return the element-wise maximum
ARROW_EXPORT
Result Maximum(const std::vector& args, MinMaxOptions options = {},
ExecContext* ctx = NULLPTR);
-/// \brief
+/// \brief Find the element-wise minimum of any number of arrays or scalars.
+/// Array values must be the same length.
///
-/// \param[in] args
+/// \param[in] args arrays or scalars to operate on.
+/// \param[in] options options for handling nulls, optional
/// \param[in] ctx the function execution context, optional
-/// \return
+/// \return the element-wise minimum
ARROW_EXPORT
Result Minimum(const std::vector& args, MinMaxOptions options = {},
ExecContext* ctx = NULLPTR);
From b9a19f3e6a8a6b8934e23cbef79708f09ffa1f18 Mon Sep 17 00:00:00 2001
From: David Li
Date: Mon, 24 May 2021 19:07:48 -0400
Subject: [PATCH 03/14] ARROW-12751: [C++] Fix Clang warnings
---
cpp/src/arrow/compute/kernels/scalar_arithmetic.cc | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 2501517fb80..e7c15f17d7d 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -613,7 +613,7 @@ struct ScalarMinMax {
// All arguments are scalar
OutValue value{};
bool valid = false;
- for (const auto arg : batch.values) {
+ for (const auto& arg : batch.values) {
const auto& scalar = *arg.scalar();
if (!scalar.is_valid) {
if (options.skip_nulls) continue;
@@ -654,7 +654,7 @@ struct ScalarMinMax {
// At least one array, two or more arguments
int64_t length = 0;
- for (const auto arg : batch.values) {
+ for (const auto& arg : batch.values) {
if (arg.is_array()) {
length = arg.array()->length;
break;
@@ -664,7 +664,7 @@ struct ScalarMinMax {
if (!options.skip_nulls) {
// We can precompute the validity buffer in this case
// If output will be all null, just return
- for (auto arg : batch.values) {
+ for (const auto& arg : batch.values) {
if (arg.is_scalar() && !arg.scalar()->is_valid) {
ARROW_ASSIGN_OR_RAISE(
auto array, MakeArrayFromScalar(*arg.scalar(), length, ctx->memory_pool()));
@@ -678,7 +678,7 @@ struct ScalarMinMax {
// AND together the validity buffers of all arrays
ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
bool first = true;
- for (auto arg : batch.values) {
+ for (const auto& arg : batch.values) {
if (!arg.is_array()) continue;
auto arr = arg.array();
if (!arr->buffers[0]) continue;
From 60ab9d061b16a1b0fb8d375105c74c5c9aecc9f5 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 25 May 2021 09:18:42 -0400
Subject: [PATCH 04/14] ARROW-12751: [C++] Fix name resolution issue
---
.../arrow/compute/kernels/scalar_boolean.cc | 56 +++++++++----------
1 file changed, 28 insertions(+), 28 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean.cc b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
index 3d47d239888..89107120fa3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_boolean.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_boolean.cc
@@ -95,7 +95,7 @@ inline Bitmap GetBitmap(const ArrayData& arr, int index) {
return Bitmap{arr.buffers[index], arr.offset, arr.length};
}
-struct Invert {
+struct InvertOp {
static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
*checked_cast(out) = InvertScalar(in);
return Status::OK();
@@ -115,8 +115,8 @@ struct Commutative {
}
};
-struct And : Commutative {
- using Commutative::Call;
+struct AndOp : Commutative {
+ using Commutative::Call;
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
@@ -147,8 +147,8 @@ struct And : Commutative {
}
};
-struct KleeneAnd : Commutative {
- using Commutative::Call;
+struct KleeneAndOp : Commutative {
+ using Commutative::Call;
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
@@ -205,7 +205,7 @@ struct KleeneAnd : Commutative {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
out->null_count = 0;
out->buffers[0] = nullptr;
- return And::Call(ctx, left, right, out);
+ return AndOp::Call(ctx, left, right, out);
}
auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
uint64_t right_false, uint64_t* out_valid,
@@ -218,8 +218,8 @@ struct KleeneAnd : Commutative {
}
};
-struct Or : Commutative {
- using Commutative::Call;
+struct OrOp : Commutative {
+ using Commutative::Call;
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
@@ -250,8 +250,8 @@ struct Or : Commutative {
}
};
-struct KleeneOr : Commutative {
- using Commutative::Call;
+struct KleeneOrOp : Commutative {
+ using Commutative::Call;
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
@@ -308,7 +308,7 @@ struct KleeneOr : Commutative {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
out->null_count = 0;
out->buffers[0] = nullptr;
- return Or::Call(ctx, left, right, out);
+ return OrOp::Call(ctx, left, right, out);
}
static auto compute_word = [](uint64_t left_true, uint64_t left_false,
@@ -323,8 +323,8 @@ struct KleeneOr : Commutative {
}
};
-struct Xor : Commutative {
- using Commutative::Call;
+struct XorOp : Commutative {
+ using Commutative::Call;
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
@@ -355,10 +355,10 @@ struct Xor : Commutative {
}
};
-struct AndNot {
+struct AndNotOp {
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
- return And::Call(ctx, left, InvertScalar(right), out);
+ return AndOp::Call(ctx, left, InvertScalar(right), out);
}
static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
@@ -373,7 +373,7 @@ struct AndNot {
static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
ArrayData* out) {
- return And::Call(ctx, left, InvertScalar(right), out);
+ return AndOp::Call(ctx, left, InvertScalar(right), out);
}
static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
@@ -385,10 +385,10 @@ struct AndNot {
}
};
-struct KleeneAndNot {
+struct KleeneAndNotOp {
static Status Call(KernelContext* ctx, const Scalar& left, const Scalar& right,
Scalar* out) {
- return KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+ return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
}
static Status Call(KernelContext* ctx, const Scalar& left, const ArrayData& right,
@@ -430,7 +430,7 @@ struct KleeneAndNot {
static Status Call(KernelContext* ctx, const ArrayData& left, const Scalar& right,
ArrayData* out) {
- return KleeneAnd::Call(ctx, left, InvertScalar(right), out);
+ return KleeneAndOp::Call(ctx, left, InvertScalar(right), out);
}
static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
@@ -438,7 +438,7 @@ struct KleeneAndNot {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
out->null_count = 0;
out->buffers[0] = nullptr;
- return AndNot::Call(ctx, left, right, out);
+ return AndNotOp::Call(ctx, left, right, out);
}
static auto compute_word = [](uint64_t left_true, uint64_t left_false,
@@ -543,20 +543,20 @@ namespace internal {
void RegisterScalarBoolean(FunctionRegistry* registry) {
// These functions can write into sliced output bitmaps
- MakeFunction("invert", 1, applicator::SimpleUnary, &invert_doc, registry);
- MakeFunction("and", 2, applicator::SimpleBinary, &and_doc, registry);
- MakeFunction("and_not", 2, applicator::SimpleBinary, &and_not_doc, registry);
- MakeFunction("or", 2, applicator::SimpleBinary, &or_doc, registry);
- MakeFunction("xor", 2, applicator::SimpleBinary, &xor_doc, registry);
+ MakeFunction("invert", 1, applicator::SimpleUnary, &invert_doc, registry);
+ MakeFunction("and", 2, applicator::SimpleBinary, &and_doc, registry);
+ MakeFunction("and_not", 2, applicator::SimpleBinary, &and_not_doc, registry);
+ MakeFunction("or", 2, applicator::SimpleBinary, &or_doc, registry);
+ MakeFunction("xor", 2, applicator::SimpleBinary, &xor_doc, registry);
// The Kleene logic kernels cannot write into sliced output bitmaps
- MakeFunction("and_kleene", 2, applicator::SimpleBinary, &and_kleene_doc,
+ MakeFunction("and_kleene", 2, applicator::SimpleBinary, &and_kleene_doc,
registry,
/*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
- MakeFunction("and_not_kleene", 2, applicator::SimpleBinary,
+ MakeFunction("and_not_kleene", 2, applicator::SimpleBinary,
&and_not_kleene_doc, registry,
/*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
- MakeFunction("or_kleene", 2, applicator::SimpleBinary, &or_kleene_doc,
+ MakeFunction("or_kleene", 2, applicator::SimpleBinary, &or_kleene_doc,
registry,
/*can_write_into_slices=*/false, NullHandling::COMPUTED_PREALLOCATE);
}
From 000620712ede15871936bf6409d7a4a3faa1be32 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 25 May 2021 11:53:01 -0400
Subject: [PATCH 05/14] ARROW-12751: [C++] Update options class name
---
cpp/src/arrow/compute/api_scalar.cc | 4 ++--
cpp/src/arrow/compute/api_scalar.h | 10 ++++++----
cpp/src/arrow/compute/kernels/scalar_arithmetic.cc | 12 ++++++------
.../arrow/compute/kernels/scalar_arithmetic_test.cc | 6 +++---
4 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/cpp/src/arrow/compute/api_scalar.cc b/cpp/src/arrow/compute/api_scalar.cc
index f2fdeb5eead..57f325ae264 100644
--- a/cpp/src/arrow/compute/api_scalar.cc
+++ b/cpp/src/arrow/compute/api_scalar.cc
@@ -63,12 +63,12 @@ SCALAR_ARITHMETIC_BINARY(Multiply, "multiply", "multiply_checked")
SCALAR_ARITHMETIC_BINARY(Divide, "divide", "divide_checked")
SCALAR_ARITHMETIC_BINARY(Power, "power", "power_checked")
-Result Maximum(const std::vector& args, MinMaxOptions options,
+Result Maximum(const std::vector& args, ElementWiseAggregateOptions options,
ExecContext* ctx) {
return CallFunction("maximum", args, &options, ctx);
}
-Result Minimum(const std::vector& args, MinMaxOptions options,
+Result Minimum(const std::vector& args, ElementWiseAggregateOptions options,
ExecContext* ctx) {
return CallFunction("minimum", args, &options, ctx);
}
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 14767918d2e..2f0710d5c87 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -42,8 +42,8 @@ struct ArithmeticOptions : public FunctionOptions {
bool check_overflow;
};
-struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
- MinMaxOptions() : skip_nulls(true) {}
+struct ARROW_EXPORT ElementWiseAggregateOptions : public FunctionOptions {
+ ElementWiseAggregateOptions() : skip_nulls(true) {}
bool skip_nulls;
};
@@ -266,7 +266,8 @@ Result Power(const Datum& left, const Datum& right,
/// \param[in] ctx the function execution context, optional
/// \return the element-wise maximum
ARROW_EXPORT
-Result Maximum(const std::vector& args, MinMaxOptions options = {},
+Result Maximum(const std::vector& args,
+ ElementWiseAggregateOptions options = {},
ExecContext* ctx = NULLPTR);
/// \brief Find the element-wise minimum of any number of arrays or scalars.
@@ -277,7 +278,8 @@ Result Maximum(const std::vector& args, MinMaxOptions options = {}
/// \param[in] ctx the function execution context, optional
/// \return the element-wise minimum
ARROW_EXPORT
-Result Minimum(const std::vector& args, MinMaxOptions options = {},
+Result Minimum(const std::vector& args,
+ ElementWiseAggregateOptions options = {},
ExecContext* ctx = NULLPTR);
/// \brief Compare a numeric array with a scalar.
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index e7c15f17d7d..d63dcc06ed2 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -601,15 +601,15 @@ std::shared_ptr MakeUnarySignedArithmeticFunctionNotNull(
return func;
}
-using MinMaxState = OptionsWrapper;
+using MinMaxState = OptionsWrapper;
// Implement a variadic scalar min/max kernel.
template
struct ScalarMinMax {
using OutValue = typename GetOutputType::T;
- static void ExecScalar(const ExecBatch& batch, const MinMaxOptions& options,
- Datum* out) {
+ static void ExecScalar(const ExecBatch& batch,
+ const ElementWiseAggregateOptions& options, Datum* out) {
// All arguments are scalar
OutValue value{};
bool valid = false;
@@ -634,7 +634,7 @@ struct ScalarMinMax {
}
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
- const MinMaxOptions& options = MinMaxState::Get(ctx);
+ const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
const auto descrs = batch.GetDescriptors();
const bool all_scalar =
std::all_of(batch.values.begin(), batch.values.end(),
@@ -890,14 +890,14 @@ const FunctionDoc minimum_doc{
("Nulls will be ignored (default) or propagated. "
"NaN will be taken over null, but not over any valid float."),
{"*args"},
- "MinMaxOptions"};
+ "ElementWiseAggregateOptions"};
const FunctionDoc maximum_doc{
"Find the element-wise maximum value",
("Nulls will be ignored (default) or propagated. "
"NaN will be taken over null, but not over any valid float."),
{"*args"},
- "MinMaxOptions"};
+ "ElementWiseAggregateOptions"};
} // namespace
void RegisterScalarArithmetic(FunctionRegistry* registry) {
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 4ce85b93db6..0fe3ac89ce0 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -320,8 +320,8 @@ class TestVarArgsArithmetic : public TestBase {
return TypeTraits::type_singleton();
}
- using VarArgsFunction = std::function(const std::vector&,
- MinMaxOptions, ExecContext*)>;
+ using VarArgsFunction = std::function(
+ const std::vector&, ElementWiseAggregateOptions, ExecContext*)>;
Datum scalar(const std::string& value) {
return ScalarFromJSON(type_singleton(), value);
@@ -367,7 +367,7 @@ class TestVarArgsArithmetic : public TestBase {
}
EqualOptions equal_options_ = EqualOptions::Defaults();
- MinMaxOptions min_max_options_;
+ ElementWiseAggregateOptions min_max_options_;
};
template
From 72ce13d2ce76b945bab1e0d01bc34ef235a584c8 Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 25 May 2021 14:20:36 -0400
Subject: [PATCH 06/14] ARROW-12751: [C++] Reorganize kernel implementation
---
.../arrow/compute/kernels/codegen_internal.h | 43 ++-
.../compute/kernels/scalar_arithmetic.cc | 250 ++++++++----------
2 files changed, 146 insertions(+), 147 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h
index e31771a89ca..6d5c837f514 100644
--- a/cpp/src/arrow/compute/kernels/codegen_internal.h
+++ b/cpp/src/arrow/compute/kernels/codegen_internal.h
@@ -303,8 +303,12 @@ struct BoxScalar;
template
struct BoxScalar> {
using T = typename GetOutputType::T;
- using ScalarType = typename TypeTraits::ScalarType;
- static void Box(T val, Scalar* out) { checked_cast(out)->value = val; }
+ static void Box(T val, Scalar* out) {
+ // Enables BoxScalar to work on a (for example) Time64Scalar
+ T* mutable_data = reinterpret_cast(
+ checked_cast<::arrow::internal::PrimitiveScalarBase*>(out)->mutable_data());
+ *mutable_data = val;
+ }
};
template
@@ -1093,6 +1097,41 @@ ArrayKernelExec GeneratePhysicalInteger(detail::GetTypeId get_id) {
}
}
+template class Generator, typename... Args>
+ArrayKernelExec GeneratePhysicalNumeric(detail::GetTypeId get_id) {
+ switch (get_id.id) {
+ case Type::INT8:
+ return Generator::Exec;
+ case Type::INT16:
+ return Generator::Exec;
+ case Type::INT32:
+ case Type::DATE32:
+ case Type::TIME32:
+ return Generator::Exec;
+ case Type::INT64:
+ case Type::DATE64:
+ case Type::TIMESTAMP:
+ case Type::TIME64:
+ case Type::DURATION:
+ return Generator::Exec;
+ case Type::UINT8:
+ return Generator::Exec;
+ case Type::UINT16:
+ return Generator::Exec;
+ case Type::UINT32:
+ return Generator::Exec;
+ case Type::UINT64:
+ return Generator::Exec;
+ case Type::FLOAT:
+ return Generator::Exec;
+ case Type::DOUBLE:
+ return Generator::Exec;
+ default:
+ DCHECK(false);
+ return ExecFail;
+ }
+}
+
// Generate a kernel given a templated functor for integer types
//
// See "Numeric" above for description of the generator functor
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index d63dcc06ed2..1ebe2824006 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -454,45 +454,6 @@ ArrayKernelExec ArithmeticExecFromOp(detail::GetTypeId get_id) {
}
}
-template class KernelGenerator, typename Op>
-ArrayKernelExec MinMaxExecFromOp(detail::GetTypeId get_id) {
- switch (get_id.id) {
- case Type::INT8:
- return KernelGenerator::Exec;
- case Type::UINT8:
- return KernelGenerator::Exec;
- case Type::INT16:
- return KernelGenerator::Exec;
- case Type::UINT16:
- return KernelGenerator::Exec;
- case Type::INT32:
- return KernelGenerator::Exec;
- case Type::DATE32:
- return KernelGenerator::Exec;
- case Type::TIME32:
- return KernelGenerator::Exec;
- case Type::UINT32:
- return KernelGenerator::Exec;
- case Type::INT64:
- return KernelGenerator::Exec;
- case Type::TIMESTAMP:
- return KernelGenerator::Exec;
- case Type::DATE64:
- return KernelGenerator::Exec;
- case Type::TIME64:
- return KernelGenerator::Exec;
- case Type::UINT64:
- return KernelGenerator::Exec;
- case Type::FLOAT:
- return KernelGenerator::Exec;
- case Type::DOUBLE:
- return KernelGenerator::Exec;
- default:
- DCHECK(false);
- return ExecFail;
- }
-}
-
struct ArithmeticFunction : ScalarFunction {
using ScalarFunction::ScalarFunction;
@@ -604,20 +565,22 @@ std::shared_ptr MakeUnarySignedArithmeticFunctionNotNull(
using MinMaxState = OptionsWrapper;
// Implement a variadic scalar min/max kernel.
-template
+template
struct ScalarMinMax {
using OutValue = typename GetOutputType::T;
static void ExecScalar(const ExecBatch& batch,
- const ElementWiseAggregateOptions& options, Datum* out) {
+ const ElementWiseAggregateOptions& options, Scalar* out) {
// All arguments are scalar
OutValue value{};
bool valid = false;
for (const auto& arg : batch.values) {
+ // Ignore non-scalar arguments so we can use it in the mixed-scalar-and-array case
+ if (!arg.is_scalar()) continue;
const auto& scalar = *arg.scalar();
if (!scalar.is_valid) {
if (options.skip_nulls) continue;
- out->scalar()->is_valid = false;
+ out->is_valid = false;
return;
}
if (!valid) {
@@ -627,20 +590,29 @@ struct ScalarMinMax {
value = Op::Call(value, UnboxScalar::Unbox(scalar));
}
}
- out->scalar()->is_valid = valid;
+ out->is_valid = valid;
if (valid) {
- BoxScalar::Box(value, out->scalar().get());
+ BoxScalar::Box(value, out);
}
}
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
const auto descrs = batch.GetDescriptors();
- const bool all_scalar =
- std::all_of(batch.values.begin(), batch.values.end(),
- [](const Datum& d) { return d.descr().shape == ValueDescr::SCALAR; });
+ bool all_scalar = true;
+ bool any_scalar = false;
+ size_t first_array_index = batch.values.size();
+ for (size_t i = 0; i < batch.values.size(); i++) {
+ const auto& datum = batch.values[i];
+ all_scalar &= datum.descr().shape == ValueDescr::SCALAR;
+ any_scalar |= datum.descr().shape == ValueDescr::SCALAR;
+ if (first_array_index >= batch.values.size() &&
+ datum.descr().shape == ValueDescr::ARRAY) {
+ first_array_index = i;
+ }
+ }
if (all_scalar) {
- ExecScalar(batch, options, out);
+ ExecScalar(batch, options, out->scalar().get());
return Status::OK();
}
@@ -653,120 +625,108 @@ struct ScalarMinMax {
}
// At least one array, two or more arguments
- int64_t length = 0;
- for (const auto& arg : batch.values) {
- if (arg.is_array()) {
- length = arg.array()->length;
- break;
+ DCHECK_GE(first_array_index, 0);
+ DCHECK_LT(first_array_index, batch.values.size());
+ DCHECK(batch.values[first_array_index].is_array());
+ if (any_scalar) {
+ ARROW_ASSIGN_OR_RAISE(std::shared_ptr temp_scalar,
+ MakeScalar(out->type(), 0));
+ ExecScalar(batch, options, temp_scalar.get());
+ if (options.skip_nulls || temp_scalar->is_valid) {
+ // Promote to output array
+ ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
+ ctx->memory_pool()));
+ *output = *array->data();
+ if (!temp_scalar->is_valid) {
+ // MakeArrayFromScalar reuses the same buffer for null/data in
+ // this case, allocate a real one since we'll be writing to it
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+ ::arrow::internal::BitmapXor(output->buffers[0]->data(), /*left_offset=*/0,
+ output->buffers[0]->data(), /*right_offset=*/0,
+ batch.length, /*out_offset=*/0,
+ output->buffers[0]->mutable_data());
+ }
+ } else {
+ // Abort early
+ ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
+ ctx->memory_pool()));
+ *output = *array->data();
+ return Status::OK();
+ }
+ } else {
+ // Copy first array argument to output array
+ const ArrayData& input = *batch.values[first_array_index].array();
+ ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+ ctx->Allocate(batch.length * sizeof(OutValue)));
+ if (options.skip_nulls && input.buffers[0]) {
+ // Don't copy the bitmap if !options.skip_nulls since we'll precompute it later
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
+ ::arrow::internal::CopyBitmap(input.buffers[0]->data(), input.offset,
+ batch.length, output->buffers[0]->mutable_data(),
+ /*dest_offset=*/0);
}
+ // This won't work for nested or variable-sized types
+ std::memcpy(output->buffers[1]->mutable_data(),
+ input.buffers[1]->data() + (input.offset * sizeof(OutValue)),
+ batch.length * sizeof(OutValue));
}
if (!options.skip_nulls) {
// We can precompute the validity buffer in this case
- // If output will be all null, just return
- for (const auto& arg : batch.values) {
- if (arg.is_scalar() && !arg.scalar()->is_valid) {
- ARROW_ASSIGN_OR_RAISE(
- auto array, MakeArrayFromScalar(*arg.scalar(), length, ctx->memory_pool()));
- *output = *array->data();
- return Status::OK();
- } else if (arg.is_array() && arg.array()->null_count == length) {
- *output = *arg.array();
- return Status::OK();
- }
- }
// AND together the validity buffers of all arrays
- ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
bool first = true;
for (const auto& arg : batch.values) {
if (!arg.is_array()) continue;
auto arr = arg.array();
if (!arr->buffers[0]) continue;
if (first) {
- ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset, length,
- output->buffers[0]->mutable_data(),
+ ::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
+ batch.length, output->buffers[0]->mutable_data(),
/*dest_offset=*/0);
first = false;
} else {
- ::arrow::internal::BitmapAnd(
- output->buffers[0]->data(), /*left_offset=*/0, arr->buffers[0]->data(),
- arr->offset, length, /*out_offset=*/0, output->buffers[0]->mutable_data());
+ ::arrow::internal::BitmapAnd(output->buffers[0]->data(), /*left_offset=*/0,
+ arr->buffers[0]->data(), arr->offset, batch.length,
+ /*out_offset=*/0,
+ output->buffers[0]->mutable_data());
}
}
}
-
- if (batch.values[0].is_scalar()) {
- // Promote to output array
- ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*batch.values[0].scalar(),
- length, ctx->memory_pool()));
- *output = *array->data();
- if (!batch.values[0].scalar()->is_valid) {
- // MakeArrayFromScalar reuses the same buffer for null/data in
- // this case, allocate a real one since we'll be writing to it
- ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
- ::arrow::internal::BitmapXor(output->buffers[0]->data(), /*left_offset=*/0,
- output->buffers[0]->data(), /*right_offset=*/0,
- length, /*out_offset=*/0,
- output->buffers[0]->mutable_data());
- }
- } else {
- // Copy to output array
- const ArrayData& input = *batch.values[0].array();
- ARROW_ASSIGN_OR_RAISE(output->buffers[1], ctx->Allocate(length * sizeof(OutValue)));
- if (options.skip_nulls && input.buffers[0]) {
- ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(length));
- ::arrow::internal::CopyBitmap(input.buffers[0]->data(), input.offset, length,
- output->buffers[0]->mutable_data(),
- /*dest_offset=*/0);
- }
- // This won't work for nested or variable-sized types
- std::memcpy(output->buffers[1]->mutable_data(),
- input.buffers[1]->data() + (input.offset * sizeof(OutValue)),
- length * sizeof(OutValue));
- }
-
- for (size_t i = 1; i < batch.values.size(); i++) {
+ const size_t start_at = any_scalar ? first_array_index : (first_array_index + 1);
+ for (size_t i = start_at; i < batch.values.size(); i++) {
+ if (!batch.values[i].is_array()) continue;
OutputArrayWriter writer(out->mutable_array());
- if (batch.values[i].is_scalar()) {
- auto scalar = batch.values[i].scalar();
- if (!scalar->is_valid) continue;
- auto value = UnboxScalar::Unbox(*scalar);
- VisitArrayValuesInline(
- *out->array(), [&](OutValue u) { writer.Write(Op::Call(u, value)); },
- [&]() { writer.Write(value); });
- if (options.skip_nulls && output->buffers[0]) output->buffers[0] = nullptr;
- } else {
- const ArrayData& arr = *batch.values[i].array();
- ArrayIterator out_it(*output);
- int64_t index = 0;
- VisitArrayValuesInline(
- arr,
- [&](OutValue value) {
- auto u = out_it();
- if (!output->buffers[0] ||
- BitUtil::GetBit(output->buffers[0]->data(), index)) {
- writer.Write(Op::Call(u, value));
- } else {
- writer.Write(value);
- }
- index++;
- },
- [&]() {
- // RHS is null, preserve the LHS
- writer.values++;
- index++;
- out_it();
- });
- // When not skipping nulls, we pre-compute the validity buffer
- if (options.skip_nulls && output->buffers[0]) {
- if (arr.buffers[0]) {
- ::arrow::internal::BitmapOr(
- output->buffers[0]->data(), /*left_offset=*/0, arr.buffers[0]->data(),
- /*right_offset=*/arr.offset, length, /*out_offset=*/0,
- output->buffers[0]->mutable_data());
- } else {
- output->buffers[0] = nullptr;
- }
+ const ArrayData& arr = *batch.values[i].array();
+ ArrayIterator out_it(*output);
+ int64_t index = 0;
+ VisitArrayValuesInline(
+ arr,
+ [&](OutValue value) {
+ auto u = out_it();
+ if (!output->buffers[0] ||
+ BitUtil::GetBit(output->buffers[0]->data(), index)) {
+ writer.Write(Op::Call(u, value));
+ } else {
+ writer.Write(value);
+ }
+ index++;
+ },
+ [&]() {
+ // RHS is null, preserve the LHS
+ writer.values++;
+ index++;
+ out_it();
+ });
+ // When not skipping nulls, we pre-compute the validity buffer
+ if (options.skip_nulls && output->buffers[0]) {
+ if (arr.buffers[0]) {
+ ::arrow::internal::BitmapOr(
+ output->buffers[0]->data(), /*left_offset=*/0, arr.buffers[0]->data(),
+ /*right_offset=*/arr.offset, batch.length, /*out_offset=*/0,
+ output->buffers[0]->mutable_data());
+ } else {
+ output->buffers[0] = nullptr;
}
}
}
@@ -780,7 +740,7 @@ std::shared_ptr MakeScalarMinMax(std::string name,
const FunctionDoc* doc) {
auto func = std::make_shared(name, Arity::VarArgs(), doc);
for (const auto& ty : NumericTypes()) {
- auto exec = MinMaxExecFromOp(ty);
+ auto exec = GeneratePhysicalNumeric(ty);
ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
MinMaxState::Init};
kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
@@ -788,7 +748,7 @@ std::shared_ptr MakeScalarMinMax(std::string name,
DCHECK_OK(func->AddKernel(std::move(kernel)));
}
for (const auto& ty : TemporalTypes()) {
- auto exec = MinMaxExecFromOp(ty);
+ auto exec = GeneratePhysicalNumeric(ty);
ScalarKernel kernel{KernelSignature::Make({ty}, ty, /*is_varargs=*/true), exec,
MinMaxState::Init};
kernel.null_handling = NullHandling::type::COMPUTED_NO_PREALLOCATE;
From a0bbdf1108caf7e04de36a055363f60b4aca17ab Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 25 May 2021 16:53:48 -0400
Subject: [PATCH 07/14] ARROW-12751: [C++] Simplify kernel implementation
---
.../compute/kernels/scalar_arithmetic.cc | 88 ++++++++-----------
.../compute/kernels/scalar_arithmetic_test.cc | 9 +-
2 files changed, 43 insertions(+), 54 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 1ebe2824006..14811e68c2f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -599,65 +599,54 @@ struct ScalarMinMax {
static Status Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
const ElementWiseAggregateOptions& options = MinMaxState::Get(ctx);
const auto descrs = batch.GetDescriptors();
- bool all_scalar = true;
- bool any_scalar = false;
- size_t first_array_index = batch.values.size();
- for (size_t i = 0; i < batch.values.size(); i++) {
- const auto& datum = batch.values[i];
- all_scalar &= datum.descr().shape == ValueDescr::SCALAR;
- any_scalar |= datum.descr().shape == ValueDescr::SCALAR;
- if (first_array_index >= batch.values.size() &&
- datum.descr().shape == ValueDescr::ARRAY) {
- first_array_index = i;
- }
- }
- if (all_scalar) {
+ const size_t scalar_count =
+ static_cast(std::count_if(batch.values.begin(), batch.values.end(),
+ [](const Datum& d) { return d.is_scalar(); }));
+ if (scalar_count == batch.values.size()) {
ExecScalar(batch, options, out->scalar().get());
return Status::OK();
}
ArrayData* output = out->mutable_array();
- // Exactly one array (output = input)
- if (batch.values.size() == 1) {
- *output = *batch[0].array();
- return Status::OK();
+ // At least one array, two or more arguments
+ ArrayDataVector arrays;
+ for (const auto& arg : batch.values) {
+ if (!arg.is_array()) continue;
+ arrays.push_back(arg.array());
}
- // At least one array, two or more arguments
- DCHECK_GE(first_array_index, 0);
- DCHECK_LT(first_array_index, batch.values.size());
- DCHECK(batch.values[first_array_index].is_array());
- if (any_scalar) {
+ if (scalar_count > 0) {
ARROW_ASSIGN_OR_RAISE(std::shared_ptr temp_scalar,
MakeScalar(out->type(), 0));
ExecScalar(batch, options, temp_scalar.get());
- if (options.skip_nulls || temp_scalar->is_valid) {
+ if (temp_scalar->is_valid) {
// Promote to output array
ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
ctx->memory_pool()));
- *output = *array->data();
- if (!temp_scalar->is_valid) {
- // MakeArrayFromScalar reuses the same buffer for null/data in
- // this case, allocate a real one since we'll be writing to it
- ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
- ::arrow::internal::BitmapXor(output->buffers[0]->data(), /*left_offset=*/0,
- output->buffers[0]->data(), /*right_offset=*/0,
- batch.length, /*out_offset=*/0,
- output->buffers[0]->mutable_data());
- }
- } else {
+ arrays.push_back(array->data());
+ } else if (!options.skip_nulls) {
// Abort early
ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
ctx->memory_pool()));
*output = *array->data();
return Status::OK();
}
+ }
+
+ // Exactly one array to consider (output = input)
+ if (arrays.size() == 1) {
+ *output = *arrays[0];
+ return Status::OK();
+ }
+
+ // Two or more arrays to consider
+ if (scalar_count > 0) {
+ // We allocated the last array from a scalar: recycle it as the output
+ *output = *arrays.back();
} else {
- // Copy first array argument to output array
- const ArrayData& input = *batch.values[first_array_index].array();
- ARROW_ASSIGN_OR_RAISE(output->buffers[1],
- ctx->Allocate(batch.length * sizeof(OutValue)));
+ // Copy last array argument to output array
+ const ArrayData& input = *arrays.back();
if (options.skip_nulls && input.buffers[0]) {
// Don't copy the bitmap if !options.skip_nulls since we'll precompute it later
ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
@@ -666,6 +655,8 @@ struct ScalarMinMax {
/*dest_offset=*/0);
}
// This won't work for nested or variable-sized types
+ ARROW_ASSIGN_OR_RAISE(output->buffers[1],
+ ctx->Allocate(batch.length * sizeof(OutValue)));
std::memcpy(output->buffers[1]->mutable_data(),
input.buffers[1]->data() + (input.offset * sizeof(OutValue)),
batch.length * sizeof(OutValue));
@@ -676,9 +667,7 @@ struct ScalarMinMax {
// AND together the validity buffers of all arrays
ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
bool first = true;
- for (const auto& arg : batch.values) {
- if (!arg.is_array()) continue;
- auto arr = arg.array();
+ for (const auto& arr : arrays) {
if (!arr->buffers[0]) continue;
if (first) {
::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
@@ -693,15 +682,14 @@ struct ScalarMinMax {
}
}
}
- const size_t start_at = any_scalar ? first_array_index : (first_array_index + 1);
- for (size_t i = start_at; i < batch.values.size(); i++) {
- if (!batch.values[i].is_array()) continue;
+ arrays.pop_back();
+
+ for (const auto& array : arrays) {
OutputArrayWriter writer(out->mutable_array());
- const ArrayData& arr = *batch.values[i].array();
ArrayIterator out_it(*output);
int64_t index = 0;
VisitArrayValuesInline(
- arr,
+ *array,
[&](OutValue value) {
auto u = out_it();
if (!output->buffers[0] ||
@@ -718,12 +706,12 @@ struct ScalarMinMax {
index++;
out_it();
});
- // When not skipping nulls, we pre-compute the validity buffer
+ // When skipping nulls, we incrementally compute the validity buffer
if (options.skip_nulls && output->buffers[0]) {
- if (arr.buffers[0]) {
+ if (array->buffers[0]) {
::arrow::internal::BitmapOr(
- output->buffers[0]->data(), /*left_offset=*/0, arr.buffers[0]->data(),
- /*right_offset=*/arr.offset, batch.length, /*out_offset=*/0,
+ output->buffers[0]->data(), /*left_offset=*/0, array->buffers[0]->data(),
+ /*right_offset=*/array->offset, batch.length, /*out_offset=*/0,
output->buffers[0]->mutable_data());
} else {
output->buffers[0] = nullptr;
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 0fe3ac89ce0..8c9618a8923 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -330,7 +330,8 @@ class TestVarArgsArithmetic : public TestBase {
Datum array(const std::string& value) { return ArrayFromJSON(type_singleton(), value); }
Datum Eval(VarArgsFunction func, const std::vector& args) {
- EXPECT_OK_AND_ASSIGN(auto actual, func(args, min_max_options_, nullptr));
+ EXPECT_OK_AND_ASSIGN(auto actual,
+ func(args, element_wise_aggregate_options_, nullptr));
if (actual.is_array()) {
auto arr = actual.make_array();
ARROW_EXPECT_OK(arr->ValidateFull());
@@ -367,7 +368,7 @@ class TestVarArgsArithmetic : public TestBase {
}
EqualOptions equal_options_ = EqualOptions::Defaults();
- ElementWiseAggregateOptions min_max_options_;
+ ElementWiseAggregateOptions element_wise_aggregate_options_;
};
template
@@ -1306,7 +1307,7 @@ TYPED_TEST(TestVarArgsArithmeticNumeric, Minimum) {
{this->scalar("null"), this->array("[null, null, null, null]")});
// Test null handling
- this->min_max_options_.skip_nulls = false;
+ this->element_wise_aggregate_options_.skip_nulls = false;
this->AssertNullScalar(Minimum, {this->scalar("null"), this->scalar("null")});
this->AssertNullScalar(Minimum, {this->scalar("0"), this->scalar("null")});
@@ -1409,7 +1410,7 @@ TYPED_TEST(TestVarArgsArithmeticNumeric, Maximum) {
{this->scalar("null"), this->array("[null, null, null, null]")});
// Test null handling
- this->min_max_options_.skip_nulls = false;
+ this->element_wise_aggregate_options_.skip_nulls = false;
this->AssertNullScalar(Maximum, {this->scalar("null"), this->scalar("null")});
this->AssertNullScalar(Maximum, {this->scalar("0"), this->scalar("null")});
From 28133cc666082aa113537be7819d35980150dade Mon Sep 17 00:00:00 2001
From: David Li
Date: Tue, 25 May 2021 17:55:29 -0400
Subject: [PATCH 08/14] ARROW-12751: [C++] Test casting to common timestamp
type
---
.../compute/kernels/scalar_arithmetic.cc | 3 ++
.../compute/kernels/scalar_arithmetic_test.cc | 31 +++++++++++++++++++
2 files changed, 34 insertions(+)
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index 14811e68c2f..ec7aefece47 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -493,6 +493,9 @@ struct ArithmeticVarArgsFunction : ScalarFunction {
if (auto type = CommonNumeric(*values)) {
ReplaceTypes(type, values);
}
+ if (auto type = CommonTimestamp(*values)) {
+ ReplaceTypes(type, values);
+ }
if (auto kernel = DispatchExactImpl(this, *values)) return kernel;
return arrow::compute::detail::NoMatchingKernel(this, *values);
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 8c9618a8923..35315f5114b 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -1461,5 +1461,36 @@ TYPED_TEST(TestVarArgsArithmeticParametricTemporal, Maximum) {
{this->array("[1, null, 3, 4]"), this->array("[2, 2, null, 2]")});
}
+TEST(TestMaximumMinimum, CommonTimestamp) {
+ {
+ auto t1 = std::make_shared(TimeUnit::SECOND);
+ auto t2 = std::make_shared(TimeUnit::MILLI);
+ auto expected = MakeScalar(t2, 1000).ValueOrDie();
+ ASSERT_OK_AND_ASSIGN(auto actual,
+ Minimum({Datum(MakeScalar(t1, 1).ValueOrDie()),
+ Datum(MakeScalar(t2, 12000).ValueOrDie())}));
+ AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+ }
+ {
+ auto t1 = std::make_shared();
+ auto t2 = std::make_shared(TimeUnit::SECOND);
+ auto expected = MakeScalar(t2, 86401).ValueOrDie();
+ ASSERT_OK_AND_ASSIGN(auto actual,
+ Maximum({Datum(MakeScalar(t1, 1).ValueOrDie()),
+ Datum(MakeScalar(t2, 86401).ValueOrDie())}));
+ AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+ }
+ {
+ auto t1 = std::make_shared();
+ auto t2 = std::make_shared();
+ auto t3 = std::make_shared(TimeUnit::SECOND);
+ auto expected = MakeScalar(t3, 86400).ValueOrDie();
+ ASSERT_OK_AND_ASSIGN(auto actual,
+ Minimum({Datum(MakeScalar(t1, 1).ValueOrDie()),
+ Datum(MakeScalar(t2, 2 * 86400000).ValueOrDie())}));
+ AssertScalarsEqual(*expected, *actual.scalar(), /*verbose=*/true);
+ }
+}
+
} // namespace compute
} // namespace arrow
From ebe0b1c8c4326cbdc17b6104568a412c3250f0b4 Mon Sep 17 00:00:00 2001
From: David Li
Date: Fri, 28 May 2021 09:41:47 -0400
Subject: [PATCH 09/14] ARROW-12751: [C++] Fix review feedback
---
.../compute/kernels/scalar_arithmetic.cc | 19 ++++----
.../compute/kernels/scalar_arithmetic_test.cc | 47 ++++++++-----------
docs/source/cpp/compute.rst | 10 ++--
3 files changed, 34 insertions(+), 42 deletions(-)
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
index ec7aefece47..d413d549c28 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc
@@ -492,8 +492,7 @@ struct ArithmeticVarArgsFunction : ScalarFunction {
if (auto type = CommonNumeric(*values)) {
ReplaceTypes(type, values);
- }
- if (auto type = CommonTimestamp(*values)) {
+ } else if (auto type = CommonTimestamp(*values)) {
ReplaceTypes(type, values);
}
@@ -619,6 +618,7 @@ struct ScalarMinMax {
arrays.push_back(arg.array());
}
+ bool can_recycle = false;
if (scalar_count > 0) {
ARROW_ASSIGN_OR_RAISE(std::shared_ptr temp_scalar,
MakeScalar(out->type(), 0));
@@ -628,6 +628,7 @@ struct ScalarMinMax {
ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
ctx->memory_pool()));
arrays.push_back(array->data());
+ can_recycle = true;
} else if (!options.skip_nulls) {
// Abort early
ARROW_ASSIGN_OR_RAISE(auto array, MakeArrayFromScalar(*temp_scalar, batch.length,
@@ -644,11 +645,11 @@ struct ScalarMinMax {
}
// Two or more arrays to consider
- if (scalar_count > 0) {
+ if (can_recycle) {
// We allocated the last array from a scalar: recycle it as the output
*output = *arrays.back();
} else {
- // Copy last array argument to output array
+ // Copy last array to output array
const ArrayData& input = *arrays.back();
if (options.skip_nulls && input.buffers[0]) {
// Don't copy the bitmap if !options.skip_nulls since we'll precompute it later
@@ -668,15 +669,13 @@ struct ScalarMinMax {
if (!options.skip_nulls) {
// We can precompute the validity buffer in this case
// AND together the validity buffers of all arrays
- ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
- bool first = true;
for (const auto& arr : arrays) {
- if (!arr->buffers[0]) continue;
- if (first) {
+ if (!arr->MayHaveNulls()) continue;
+ if (!output->buffers[0]) {
+ ARROW_ASSIGN_OR_RAISE(output->buffers[0], ctx->AllocateBitmap(batch.length));
::arrow::internal::CopyBitmap(arr->buffers[0]->data(), arr->offset,
batch.length, output->buffers[0]->mutable_data(),
/*dest_offset=*/0);
- first = false;
} else {
::arrow::internal::BitmapAnd(output->buffers[0]->data(), /*left_offset=*/0,
arr->buffers[0]->data(), arr->offset, batch.length,
@@ -721,7 +720,7 @@ struct ScalarMinMax {
}
}
}
- output->null_count = -1;
+ output->null_count = output->buffers[0] ? -1 : 0;
return Status::OK();
}
};
diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
index 35315f5114b..59ebb4408a3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc
@@ -17,7 +17,6 @@
#include