From 23380f6c52f860c19fc557be5afcabe8a8842ed3 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 9 Dec 2021 10:31:04 +0100 Subject: [PATCH 1/9] Add IsMonotonic vector function --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/compute/api_aggregate.h | 1 - cpp/src/arrow/compute/api_vector.cc | 33 ++++ cpp/src/arrow/compute/api_vector.h | 50 ++++++ cpp/src/arrow/compute/kernels/CMakeLists.txt | 1 + .../compute/kernels/vector_is_monotonic.cc | 148 ++++++++++++++++++ .../kernels/vector_is_monotonic_test.cc | 106 +++++++++++++ cpp/src/arrow/compute/registry.cc | 1 + cpp/src/arrow/compute/registry_internal.h | 1 + 9 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 cpp/src/arrow/compute/kernels/vector_is_monotonic.cc create mode 100644 cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 5736c557bd0..5e0028cd5db 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -428,6 +428,7 @@ if(ARROW_COMPUTE) compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_hash.cc + compute/kernels/vector_is_monotonic.cc compute/kernels/vector_nested.cc compute/kernels/vector_replace.cc compute/kernels/vector_selection.cc diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index c8df81773d4..489a001da9b 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -394,7 +394,6 @@ Result Index(const Datum& value, const IndexOptions& options, ExecContext* ctx = NULLPTR); namespace internal { - /// Internal use only: streaming group identifier. /// Consumes batches of keys and yields batches of the group ids. class ARROW_EXPORT Grouper { diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 95114d8d8a5..8a87b028c8c 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -42,6 +42,7 @@ namespace internal { using compute::DictionaryEncodeOptions; using compute::FilterOptions; +using compute::MonotonicityOrder; using compute::NullPlacement; template <> @@ -88,6 +89,25 @@ struct EnumTraits return ""; } }; +template <> +struct EnumTraits + : BasicEnumTraits { + static std::string name() { return "MonotonicityOrder"; } + static std::string value_name(MonotonicityOrder value) { + switch (value) { + case MonotonicityOrder::Increasing: + return "INCREASING"; + case MonotonicityOrder::StrictlyIncreasing: + return "STRICTLY_INCREASING"; + case MonotonicityOrder::Decreasing: + return "DECREASING"; + case MonotonicityOrder::StrictlyDecreasing: + return "STRICTLY_DECREASING"; + } + return ""; + } +}; } // namespace internal @@ -135,6 +155,9 @@ static auto kPartitionNthOptionsType = GetFunctionOptionsType( DataMember("k", &SelectKOptions::k), DataMember("sort_keys", &SelectKOptions::sort_keys)); +static auto kIsMonotonicOptionsType = GetFunctionOptionsType( + DataMember("order", &IsMonotonicOptions::order)); + } // namespace } // namespace internal @@ -176,6 +199,10 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector sort_keys) sort_keys(std::move(sort_keys)) {} constexpr char SelectKOptions::kTypeName[]; +IsMonotonicOptions::IsMonotonicOptions(MonotonicityOrder order) + : FunctionOptions(internal::kIsMonotonicOptionsType), order(order) {} +constexpr char IsMonotonicOptions::kTypeName[]; + namespace internal { void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType)); @@ -185,6 +212,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kIsMonotonicOptionsType)); } } // namespace internal @@ -280,6 +308,11 @@ Result> ValueCounts(const Datum& value, ExecContext return checked_pointer_cast(result.make_array()); } +Result IsMonotonic(const Datum& value, const IsMonotonicOptions& options, + ExecContext* ctx) { + return CallFunction("is_monotonic", {value}, &options, ctx); +} + // ---------------------------------------------------------------------- // Filter- and take-related selection functions diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 8788d5d160e..ba329dfb2bc 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -188,6 +188,42 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions { NullPlacement null_placement; }; +/// Defines the order used to determine monotonicity. +enum class MonotonicityOrder { + /// Check if elements are monotonically increasing (>=). + Increasing, + /// Check if elements are strictly monotonically increasing (>). + StrictlyIncreasing, + /// Check if elements are monotonically descreasing (<=). + Decreasing, + /// Check if elements are strictly monotonically descreasing (<). + StrictlyDecreasing, +}; + +/// \brief Options for IsMonotonic +class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { + public: + explicit IsMonotonicOptions(MonotonicityOrder order = MonotonicityOrder::Increasing); + constexpr static char const kTypeName[] = "IsMonotonicOptions"; + static IsMonotonicOptions Increasing() { + return IsMonotonicOptions(MonotonicityOrder::Increasing); + } + static IsMonotonicOptions StrictlyIncreasing() { + return IsMonotonicOptions(MonotonicityOrder::StrictlyIncreasing); + } + static IsMonotonicOptions Decreasing() { + return IsMonotonicOptions(MonotonicityOrder::Decreasing); + } + static IsMonotonicOptions StrictlyDecreasing() { + return IsMonotonicOptions(MonotonicityOrder::StrictlyDecreasing); + } + static IsMonotonicOptions Defaults() { return Increasing(); } + + // Defines the order used to determine monotonicity. + MonotonicityOrder order; + // todo(mb): add option to define how nulls are handled +}; + /// @} /// \brief Filter with a boolean selection filter @@ -494,6 +530,20 @@ Result DictionaryEncode( const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(), ExecContext* ctx = NULLPTR); +/// \brief Returns if the array contains monotonically increasing/decreasing +/// values. +/// +/// \param[in] data input data. +/// \param[in] options see IsMonotonicOptions for more information. +/// \param[in] ctx the function execution context, optional. +/// \return resulting datum. +/// +/// \since x.0.0 \note API not yet finalized +ARROW_EXPORT +Result IsMonotonic( + const Datum& data, const IsMonotonicOptions& options = IsMonotonicOptions::Defaults(), + ExecContext* ctx = NULLPTR); + // ---------------------------------------------------------------------- // Deprecated functions diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 28686a9cafa..6e0f2b826b4 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -46,6 +46,7 @@ add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute") add_arrow_compute_test(vector_test SOURCES vector_hash_test.cc + vector_is_monotonic_test.cc vector_nested_test.cc vector_replace_test.cc vector_selection_test.cc diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc new file mode 100644 index 00000000000..68daef43897 --- /dev/null +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -0,0 +1,148 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/api_vector.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/aggregate_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/optional.h" + +#include +#include + +namespace arrow { +namespace compute { +namespace internal { + +namespace { +// ---------------------------------------------------------------------- +// IsMonotonic implementation + +using IsMonotonicState = OptionsWrapper; + +template +Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + // Not sure if this can fail. + MonotonicityOrder order = IsMonotonicState::Get(ctx).order; + + // Check batch size + if (batch.values.size() != 1) { + return Status::Invalid("IsMonotonic expects a single datum (array) as input"); + } + // Made sure there is at least one input datum. + Datum input = batch[0]; + + // Validate input datum type + // - how much is handled by the type stuff in the registry? + // I think this is unreachable (at least when invoked through the compute registry) + if (!input.is_array()) { + return Status::Invalid("IsMonotonic expects array datum as input"); + } + + // Made sure that the input datum is an array. + const std::shared_ptr& array_data = input.array(); + typename TypeTraits::ArrayType array(array_data); + + // Initial output for early-exists. + *out = Datum(false); + + // Return early if there is just zero elements or one element in the array. + if (array.length() <= 1) { + *out = Datum(true); + return Status::OK(); + } + + // Safety: + // - Made sure that the length is at least 2 above. + for (auto a = array.begin(), b = ++array.begin(); b != array.end(); ++a, ++b) { + auto current = *a; + auto next = *b; + + switch (order) { + case MonotonicityOrder::Increasing: { + if (!(next >= current)) { + return Status::OK(); + } + break; + } + case MonotonicityOrder::StrictlyIncreasing: { + if (!(next > current)) { + return Status::OK(); + } + break; + } + case MonotonicityOrder::Decreasing: { + if (!(next <= current)) { + return Status::OK(); + } + break; + } + case MonotonicityOrder::StrictlyDecreasing: { + if (!(next < current)) { + return Status::OK(); + } + break; + } + } + } + + // At this point the for loop did not early-exit. + *out = Datum(true); + + return Status::OK(); +} + +} // namespace + +const FunctionDoc is_monotonic_doc{ + "Returns if the array contains monotonically increasing/decreasing" + "values", + ("Returns a BooleanScalar(true) only if all the elements of the input array \n" + "are in ascending/descending order.\n" + "The options define the order that is used to determine the monotonicity.\n" + "Always returns BooleanScalar.\n" + "Implemented for arrays with well-ordered element types."), + {"array"}, + "IsMonotonicOptions"}; + +template +Status AddIsMonotonicKernel(VectorFunction* func) { + static const ValueDescr output_type = ValueDescr::Scalar(boolean()); + VectorKernel is_monotonic_base; + is_monotonic_base.init = IsMonotonicState::Init; + is_monotonic_base.can_execute_chunkwise = false; + is_monotonic_base.signature = + KernelSignature::Make({TypeTraits::type_singleton()}, output_type); + is_monotonic_base.exec = IsMonotonic; + return func->AddKernel(is_monotonic_base); +} + +void RegisterVectorIsMonotonic(FunctionRegistry* registry) { + static const IsMonotonicOptions default_options; + auto func = std::make_shared("is_monotonic", Arity::Unary(), + &is_monotonic_doc, &default_options); + + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + DCHECK_OK(registry->AddFunction(std::move(func))); +} + +} // namespace internal + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc new file mode 100644 index 00000000000..90e0299cb1b --- /dev/null +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/api_vector.cc" +#include "arrow/compute/kernels/test_util.h" +#include "arrow/datum.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/matchers.h" +#include "arrow/type.h" + +namespace arrow { +namespace compute { + +static IsMonotonicOptions opt_increasing(MonotonicityOrder::Increasing); +static IsMonotonicOptions opt_strictly_increasing(MonotonicityOrder::StrictlyIncreasing); +static IsMonotonicOptions opt_decreasing(MonotonicityOrder::Decreasing); +static IsMonotonicOptions opt_strictly_decreasing(MonotonicityOrder::StrictlyDecreasing); + +void CheckIsMonotonic(Datum input, bool increasing, bool strictly_increasing, + bool decreasing, bool strictly_decreasing) { + CheckVectorUnary("is_monotonic", input, Datum(increasing), &opt_increasing); + CheckVectorUnary("is_monotonic", input, Datum(strictly_increasing), + &opt_strictly_increasing); + CheckVectorUnary("is_monotonic", input, Datum(decreasing), &opt_decreasing); + CheckVectorUnary("is_monotonic", input, Datum(strictly_decreasing), + &opt_strictly_decreasing); +} + +TEST(TestIsMonotonicKernel, VectorFunction) { + // Primitive arrays + + // These tests should early exit (based on length). + CheckIsMonotonic(ArrayFromJSON(int8(), "[]"), true, true, true, true); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null]"), true, true, true, true); + CheckIsMonotonic(ArrayFromJSON(int8(), "[0]"), true, true, true, true); + + // Both monotonic increasing and decreasing when all values are the same. + CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, 0]"), true, false, true, false); + + // - nulls are ignored --> this will be the default + // - allow nulls before any number + // - allow consider nulls after any number + + // ---- don't do these + // - use std::optional logic + // - anything with a null returns false + + // Using std::optional logic here for nulls: + // - lhs is considered equal to rhs if, and only if, both lhs and rhs do not contain a + // value. + // - lhs is considered less than rhs if, and only if, rhs contains a value and lhs does + // not. + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, null]"), false, false, true, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, null]"), true, false, true, false); + + // Monotonic (strictly) increasing + CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, 4]"), true, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, null]"), false, false, false, false); + + CheckIsMonotonic(ArrayFromJSON(int8(), "[-1, 2, 3, 4]"), true, true, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, false, false, + false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 3, null]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 1, 2]"), false, false, false, false); + + // Monotonic (strictly) decreasing + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, 1]"), false, false, true, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, false, false); + + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, 1]"), false, false, true, true); + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true); + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, + false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, false, false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, false, + false); + CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 4, 3]"), false, false, false, false); +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index c6455874dbc..b2e31520aff 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -173,6 +173,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Vector functions RegisterVectorArraySort(registry.get()); RegisterVectorHash(registry.get()); + RegisterVectorIsMonotonic(registry.get()); RegisterVectorNested(registry.get()); RegisterVectorReplace(registry.get()); RegisterVectorSelection(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index 98f61185f7b..5d5a2434a62 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -42,6 +42,7 @@ void RegisterScalarOptions(FunctionRegistry* registry); // Vector functions void RegisterVectorArraySort(FunctionRegistry* registry); void RegisterVectorHash(FunctionRegistry* registry); +void RegisterVectorIsMonotonic(FunctionRegistry* registry); void RegisterVectorNested(FunctionRegistry* registry); void RegisterVectorReplace(FunctionRegistry* registry); void RegisterVectorSelection(FunctionRegistry* registry); From 52ddd129a16cbdec7e29742644595e082e9c03b6 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Fri, 10 Dec 2021 11:32:25 +0100 Subject: [PATCH 2/9] Return StructScalar from IsMonotonic function --- cpp/src/arrow/compare.cc | 17 +++ cpp/src/arrow/compare.h | 12 +- cpp/src/arrow/compute/api_vector.cc | 38 +++--- cpp/src/arrow/compute/api_vector.h | 59 ++++----- cpp/src/arrow/compute/function_internal.h | 23 ++++ .../compute/kernels/vector_is_monotonic.cc | 122 +++++++++++------ .../kernels/vector_is_monotonic_test.cc | 125 +++++++++--------- 7 files changed, 242 insertions(+), 154 deletions(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index d4b94e53838..450c3acd3b5 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,22 @@ using internal::OptionalBitmapEquals; // ---------------------------------------------------------------------- // Public method implementations +bool EqualOptions::Equals(const EqualOptions& other) const { + return nans_equal_ == other.nans_equal() && atol_ == other.atol() && + diff_sink_ == other.diff_sink(); +} + +std::string EqualOptions::ToString() const { + std::stringstream ss; + if (nans_equal_) { + ss << "NaNs EQUAL "; + } else { + ss << "NaNs NOT EQUAL "; + } + ss << "atol: " << atol_; + return ss.str(); +} + namespace { // TODO also handle HALF_FLOAT NaNs diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index 6769b23867b..abee230ee31 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -22,6 +22,7 @@ #include #include +#include "arrow/util/compare.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -36,7 +37,7 @@ struct Scalar; static constexpr double kDefaultAbsoluteTolerance = 1E-5; /// A container of options for equality comparisons -class EqualOptions { +class EqualOptions : public util::EqualityComparable { public: /// Whether or not NaNs are considered equal. bool nans_equal() const { return nans_equal_; } @@ -71,6 +72,15 @@ class EqualOptions { return res; } + /// \brief Return a string representation of this EqualOptions suitable for printing. + std::string ToString() const; + + using util::EqualityComparable::Equals; + using util::EqualityComparable::operator==; + using util::EqualityComparable::operator!=; + /// \brief Return true if both EqualOptions are the same. + bool Equals(const EqualOptions& other) const; + static EqualOptions Defaults() { return {}; } protected: diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 8a87b028c8c..7f87abe4989 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -42,7 +42,7 @@ namespace internal { using compute::DictionaryEncodeOptions; using compute::FilterOptions; -using compute::MonotonicityOrder; +using compute::IsMonotonicOptions; using compute::NullPlacement; template <> @@ -90,20 +90,20 @@ struct EnumTraits } }; template <> -struct EnumTraits - : BasicEnumTraits { - static std::string name() { return "MonotonicityOrder"; } - static std::string value_name(MonotonicityOrder value) { +struct EnumTraits + : BasicEnumTraits { + static std::string name() { return "IsMonotonicOptions::NullHandling"; } + static std::string value_name(IsMonotonicOptions::NullHandling value) { switch (value) { - case MonotonicityOrder::Increasing: - return "INCREASING"; - case MonotonicityOrder::StrictlyIncreasing: - return "STRICTLY_INCREASING"; - case MonotonicityOrder::Decreasing: - return "DECREASING"; - case MonotonicityOrder::StrictlyDecreasing: - return "STRICTLY_DECREASING"; + case IsMonotonicOptions::NullHandling::IGNORE: + return "IGNORE"; + case IsMonotonicOptions::NullHandling::MIN_INF: + return "MIN_INF"; + case IsMonotonicOptions::NullHandling::INF: + return "INF"; } return ""; } @@ -156,7 +156,8 @@ static auto kSelectKOptionsType = GetFunctionOptionsType( DataMember("k", &SelectKOptions::k), DataMember("sort_keys", &SelectKOptions::sort_keys)); static auto kIsMonotonicOptionsType = GetFunctionOptionsType( - DataMember("order", &IsMonotonicOptions::order)); + DataMember("null_handling", &IsMonotonicOptions::null_handling), + DataMember("equal_options", &IsMonotonicOptions::equal_options)); } // namespace } // namespace internal @@ -199,8 +200,11 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector sort_keys) sort_keys(std::move(sort_keys)) {} constexpr char SelectKOptions::kTypeName[]; -IsMonotonicOptions::IsMonotonicOptions(MonotonicityOrder order) - : FunctionOptions(internal::kIsMonotonicOptionsType), order(order) {} +IsMonotonicOptions::IsMonotonicOptions(IsMonotonicOptions::NullHandling null_handling, + EqualOptions equal_options) + : FunctionOptions(internal::kIsMonotonicOptionsType), + null_handling(null_handling), + equal_options(equal_options) {} constexpr char IsMonotonicOptions::kTypeName[]; namespace internal { diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index ba329dfb2bc..7bbaa02ed6c 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -188,40 +188,27 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions { NullPlacement null_placement; }; -/// Defines the order used to determine monotonicity. -enum class MonotonicityOrder { - /// Check if elements are monotonically increasing (>=). - Increasing, - /// Check if elements are strictly monotonically increasing (>). - StrictlyIncreasing, - /// Check if elements are monotonically descreasing (<=). - Decreasing, - /// Check if elements are strictly monotonically descreasing (<). - StrictlyDecreasing, -}; - /// \brief Options for IsMonotonic class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { public: - explicit IsMonotonicOptions(MonotonicityOrder order = MonotonicityOrder::Increasing); + enum NullHandling { + // Ignore nulls. + IGNORE, + // Use min value of element type as the value of nulls. + MIN_INF, + // Use max value of element type as the value of nulls. + INF, + }; + + explicit IsMonotonicOptions(NullHandling null_handling = IGNORE, + EqualOptions equal_options = EqualOptions::Defaults()); constexpr static char const kTypeName[] = "IsMonotonicOptions"; - static IsMonotonicOptions Increasing() { - return IsMonotonicOptions(MonotonicityOrder::Increasing); - } - static IsMonotonicOptions StrictlyIncreasing() { - return IsMonotonicOptions(MonotonicityOrder::StrictlyIncreasing); - } - static IsMonotonicOptions Decreasing() { - return IsMonotonicOptions(MonotonicityOrder::Decreasing); - } - static IsMonotonicOptions StrictlyDecreasing() { - return IsMonotonicOptions(MonotonicityOrder::StrictlyDecreasing); - } - static IsMonotonicOptions Defaults() { return Increasing(); } + static IsMonotonicOptions Defaults() { return IsMonotonicOptions(); } - // Defines the order used to determine monotonicity. - MonotonicityOrder order; - // todo(mb): add option to define how nulls are handled + // Define how nulls are handled. + NullHandling null_handling; + // Options for equality comparisons. Used for floats. + EqualOptions equal_options; }; /// @} @@ -530,13 +517,21 @@ Result DictionaryEncode( const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(), ExecContext* ctx = NULLPTR); -/// \brief Returns if the array contains monotonically increasing/decreasing -/// values. +/// \brief Returns information about the monotonicity of the elements in an +/// array with well-ordered elements. +/// +/// Returns a struct scalar with type +/// struct< +/// increasing: boolean, +/// strictly_increasing: boolean, +/// decreasing: boolean, +/// strictly_decreasing: boolean +/// > /// /// \param[in] data input data. /// \param[in] options see IsMonotonicOptions for more information. /// \param[in] ctx the function execution context, optional. -/// \return resulting datum. +/// \return resulting datum as a struct scalar. /// /// \since x.0.0 \note API not yet finalized ARROW_EXPORT diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 0395ab3e9fb..1e2c951f6e6 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -205,6 +205,8 @@ static inline std::string GenericToString(const std::vector& value) { return ss.str(); } +static inline std::string GenericToString(EqualOptions value) { return value.ToString(); } + template static inline bool GenericEquals(const T& left, const T& right) { return left == right; @@ -366,6 +368,12 @@ static inline Result> GenericToScalar(const Datum& value } } +static inline Result> GenericToScalar(const EqualOptions& value) { + ARROW_ASSIGN_OR_RAISE(auto nans_equal, GenericToScalar(value.nans_equal())); + ARROW_ASSIGN_OR_RAISE(auto atol, GenericToScalar(value.atol())); + return StructScalar::Make({nans_equal, atol}, {"nans_equal", "atol"}); +} + template static inline enable_if_primitive_ctype::ArrowType, Result> GenericFromScalar(const std::shared_ptr& value) { @@ -486,6 +494,21 @@ GenericFromScalar(const std::shared_ptr& value) { return result; } +template +static inline enable_if_same_result GenericFromScalar( + const std::shared_ptr& value) { + if (value->type->id() != Type::STRUCT) { + return Status::Invalid("Expected type STRUCT but got ", value->type->id()); + } + if (!value->is_valid) return Status::Invalid("Got null scalar"); + const auto& holder = checked_cast(*value); + ARROW_ASSIGN_OR_RAISE(auto nans_equal_holder, holder.field("nans_equal")); + ARROW_ASSIGN_OR_RAISE(auto atol_holder, holder.field("atol")); + ARROW_ASSIGN_OR_RAISE(auto nans_equal, GenericFromScalar(nans_equal_holder)); + ARROW_ASSIGN_OR_RAISE(auto atol, GenericFromScalar(atol_holder)); + return EqualOptions().nans_equal(nans_equal).atol(atol); +} + template struct StringifyImpl { template diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index 68daef43897..c737d8ac855 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -21,9 +21,6 @@ #include "arrow/compute/registry.h" #include "arrow/util/optional.h" -#include -#include - namespace arrow { namespace compute { namespace internal { @@ -34,16 +31,36 @@ namespace { using IsMonotonicState = OptionsWrapper; +Status IsMonotonicOutput(bool increasing, bool strictly_increasing, bool decreasing, + bool strictly_decreasing, Datum* out) { + ARROW_ASSIGN_OR_RAISE( + *out, + StructScalar::Make( + {std::make_shared(BooleanScalar(increasing)), + std::make_shared(BooleanScalar(strictly_increasing)), + std::make_shared(BooleanScalar(decreasing)), + std::make_shared(BooleanScalar(strictly_decreasing))}, + {"increasing", "strictly_increasing", "decreasing", "strictly_decreasing"})); + return Status::OK(); +} + +// todo(mb): floats template Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + using ArrayType = typename TypeTraits::ArrayType; + using CType = typename TypeTraits::CType; // Not sure if this can fail. - MonotonicityOrder order = IsMonotonicState::Get(ctx).order; + IsMonotonicOptions::NullHandling null_handling = + IsMonotonicState::Get(ctx).null_handling; + EqualOptions equal_options = IsMonotonicState::Get(ctx).equal_options; // Check batch size if (batch.values.size() != 1) { return Status::Invalid("IsMonotonic expects a single datum (array) as input"); } - // Made sure there is at least one input datum. + + // Safety: + // - Made sure there is at least one input datum. Datum input = batch[0]; // Validate input datum type @@ -55,59 +72,80 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { // Made sure that the input datum is an array. const std::shared_ptr& array_data = input.array(); - typename TypeTraits::ArrayType array(array_data); - - // Initial output for early-exists. - *out = Datum(false); + ArrayType array(array_data); - // Return early if there is just zero elements or one element in the array. - if (array.length() <= 1) { - *out = Datum(true); - return Status::OK(); + // Return early if there are zero elements or one element in the array. + // And return early if there are only nulls. + if (array.length() <= 1 || array.null_count() == array.length()) { + return IsMonotonicOutput(true, true, true, true, out); } + // Set null value based on option. + auto null_value = IsMonotonicOptions::NullHandling::MIN_INF + ? std::numeric_limits::min() + : std::numeric_limits::max(); + + bool inc = true, s_inc = true, dec = true, s_dec = true; + // Safety: // - Made sure that the length is at least 2 above. for (auto a = array.begin(), b = ++array.begin(); b != array.end(); ++a, ++b) { auto current = *a; auto next = *b; - switch (order) { - case MonotonicityOrder::Increasing: { - if (!(next >= current)) { - return Status::OK(); - } - break; + // Handle nulls + if (!current.has_value() || !next.has_value()) { + if (null_handling == IsMonotonicOptions::NullHandling::IGNORE) { + // Skip this iteration. + continue; + } else { + current = current.value_or(null_value); + next = next.value_or(null_value); } - case MonotonicityOrder::StrictlyIncreasing: { - if (!(next > current)) { - return Status::OK(); - } - break; + } + + // Check if increasing. + if (inc) { + if (!(next >= current)) { + inc = false; + // Can't be strictly increasing if not increasing. + s_inc = false; } - case MonotonicityOrder::Decreasing: { - if (!(next <= current)) { - return Status::OK(); - } - break; + } + // Check if strictly increasing. + if (s_inc) { + if (!(next > current)) { + s_inc = false; } - case MonotonicityOrder::StrictlyDecreasing: { - if (!(next < current)) { - return Status::OK(); - } - break; + } + // Check if decreasing. + if (dec) { + if (!(next <= current)) { + dec = false; + // Can't be strictly decreasing if not decreasing. + s_dec = false; + } + } + // Check if strictly decreasing. + if (s_dec) { + if (!(next < current)) { + s_dec = false; } } - } - // At this point the for loop did not early-exit. - *out = Datum(true); + // Early exit if all failed: + if (!inc && !s_inc && !dec && !s_dec) { + break; + } + } - return Status::OK(); + // Output + return IsMonotonicOutput(inc, s_inc, dec, s_dec, out); } } // namespace +// todo(mb): update const FunctionDoc is_monotonic_doc{ "Returns if the array contains monotonically increasing/decreasing" "values", @@ -121,7 +159,12 @@ const FunctionDoc is_monotonic_doc{ template Status AddIsMonotonicKernel(VectorFunction* func) { - static const ValueDescr output_type = ValueDescr::Scalar(boolean()); + static const ValueDescr output_type = ValueDescr::Scalar(struct_({ + field("increasing", boolean()), + field("strictly_increasing", boolean()), + field("decreasing", boolean()), + field("strictly_decreasing", boolean()), + })); VectorKernel is_monotonic_base; is_monotonic_base.init = IsMonotonicState::Init; is_monotonic_base.can_execute_chunkwise = false; @@ -138,6 +181,7 @@ void RegisterVectorIsMonotonic(FunctionRegistry* registry) { DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(AddIsMonotonicKernel(func.get())); + // todo(mb): add other types DCHECK_OK(registry->AddFunction(std::move(func))); } diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc index 90e0299cb1b..94a28a7c32e 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -18,8 +18,10 @@ #include #include "arrow/compute/api_vector.cc" +#include "arrow/compute/exec.h" #include "arrow/compute/kernels/test_util.h" #include "arrow/datum.h" +#include "arrow/scalar.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" #include "arrow/type.h" @@ -27,19 +29,20 @@ namespace arrow { namespace compute { -static IsMonotonicOptions opt_increasing(MonotonicityOrder::Increasing); -static IsMonotonicOptions opt_strictly_increasing(MonotonicityOrder::StrictlyIncreasing); -static IsMonotonicOptions opt_decreasing(MonotonicityOrder::Decreasing); -static IsMonotonicOptions opt_strictly_decreasing(MonotonicityOrder::StrictlyDecreasing); - void CheckIsMonotonic(Datum input, bool increasing, bool strictly_increasing, - bool decreasing, bool strictly_decreasing) { - CheckVectorUnary("is_monotonic", input, Datum(increasing), &opt_increasing); - CheckVectorUnary("is_monotonic", input, Datum(strictly_increasing), - &opt_strictly_increasing); - CheckVectorUnary("is_monotonic", input, Datum(decreasing), &opt_decreasing); - CheckVectorUnary("is_monotonic", input, Datum(strictly_decreasing), - &opt_strictly_decreasing); + bool decreasing, bool strictly_decreasing, + IsMonotonicOptions options = IsMonotonicOptions::Defaults()) { + ASSERT_OK_AND_ASSIGN(Datum out, CallFunction("is_monotonic", {input}, &options)); + const StructScalar& output = out.scalar_as(); + + auto out_increasing = std::static_pointer_cast(output.value[0]); + ASSERT_EQ(increasing, out_increasing->value); + auto out_strictly_increasing = std::static_pointer_cast(output.value[1]); + ASSERT_EQ(strictly_increasing, out_strictly_increasing->value); + auto out_decreasing = std::static_pointer_cast(output.value[2]); + ASSERT_EQ(decreasing, out_decreasing->value); + auto out_strictly_decreasing = std::static_pointer_cast(output.value[3]); + ASSERT_EQ(strictly_decreasing, out_strictly_decreasing->value); } TEST(TestIsMonotonicKernel, VectorFunction) { @@ -47,59 +50,51 @@ TEST(TestIsMonotonicKernel, VectorFunction) { // These tests should early exit (based on length). CheckIsMonotonic(ArrayFromJSON(int8(), "[]"), true, true, true, true); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null]"), true, true, true, true); - CheckIsMonotonic(ArrayFromJSON(int8(), "[0]"), true, true, true, true); - - // Both monotonic increasing and decreasing when all values are the same. - CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, 0]"), true, false, true, false); - - // - nulls are ignored --> this will be the default - // - allow nulls before any number - // - allow consider nulls after any number - - // ---- don't do these - // - use std::optional logic - // - anything with a null returns false - - // Using std::optional logic here for nulls: - // - lhs is considered equal to rhs if, and only if, both lhs and rhs do not contain a - // value. - // - lhs is considered less than rhs if, and only if, rhs contains a value and lhs does - // not. - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, null]"), false, false, true, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, null]"), true, false, true, false); - - // Monotonic (strictly) increasing - CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, 4]"), true, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, null]"), false, false, false, false); - - CheckIsMonotonic(ArrayFromJSON(int8(), "[-1, 2, 3, 4]"), true, true, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, false, false, - false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 3, null]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 1, 2]"), false, false, false, false); - - // Monotonic (strictly) decreasing - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, 1]"), false, false, true, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, false, false); - - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, 1]"), false, false, true, true); - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true); - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, - false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, false, false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, false, - false); - CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 4, 3]"), false, false, false, false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[null]"), true, true, true, true); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[0]"), true, true, true, true); + + // // Both monotonic increasing and decreasing when all values are the same. + // CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, 0]"), true, false, true, false); + + // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, false, + // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, null]"), false, false, + // true, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, + // false, false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, null]"), + // true, false, true, false); + + // // Monotonic (strictly) increasing + // CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, 4]"), true, false, false, false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, + // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, + // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, null]"), false, + // false, false, false); + + // CheckIsMonotonic(ArrayFromJSON(int8(), "[-1, 2, 3, 4]"), true, true, false, false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, false, false, + // false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, + // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 3, null]"), false, false, + // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 1, 2]"), false, false, + // false, false); + + // // Monotonic (strictly) decreasing + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, 1]"), false, false, true, false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, + // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, + // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, + // false, false, false); + + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, 1]"), false, false, true, true); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, + // false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, + // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, + // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, + // false, false, + // false); + // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 4, 3]"), false, false, false, false); } } // namespace compute From a0ae4957a489e75535677050a63847329afba472 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Mon, 13 Dec 2021 14:03:24 +0100 Subject: [PATCH 3/9] Fix null handling and avoid object slicing --- cpp/src/arrow/compute/api_vector.cc | 12 +- cpp/src/arrow/compute/api_vector.h | 4 +- .../compute/kernels/vector_is_monotonic.cc | 141 +++++++++------ .../kernels/vector_is_monotonic_test.cc | 168 ++++++++++++------ 4 files changed, 216 insertions(+), 109 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 7f87abe4989..5ab5dec59bd 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -93,17 +93,17 @@ template <> struct EnumTraits : BasicEnumTraits { + IsMonotonicOptions::NullHandling::MIN, + IsMonotonicOptions::NullHandling::MAX> { static std::string name() { return "IsMonotonicOptions::NullHandling"; } static std::string value_name(IsMonotonicOptions::NullHandling value) { switch (value) { case IsMonotonicOptions::NullHandling::IGNORE: return "IGNORE"; - case IsMonotonicOptions::NullHandling::MIN_INF: - return "MIN_INF"; - case IsMonotonicOptions::NullHandling::INF: - return "INF"; + case IsMonotonicOptions::NullHandling::MIN: + return "MIN"; + case IsMonotonicOptions::NullHandling::MAX: + return "MAX"; } return ""; } diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 7bbaa02ed6c..1384c998720 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -195,9 +195,9 @@ class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { // Ignore nulls. IGNORE, // Use min value of element type as the value of nulls. - MIN_INF, + MIN, // Use max value of element type as the value of nulls. - INF, + MAX, }; explicit IsMonotonicOptions(NullHandling null_handling = IGNORE, diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index c737d8ac855..1a04d07f0aa 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -34,22 +34,20 @@ using IsMonotonicState = OptionsWrapper; Status IsMonotonicOutput(bool increasing, bool strictly_increasing, bool decreasing, bool strictly_decreasing, Datum* out) { ARROW_ASSIGN_OR_RAISE( - *out, - StructScalar::Make( - {std::make_shared(BooleanScalar(increasing)), - std::make_shared(BooleanScalar(strictly_increasing)), - std::make_shared(BooleanScalar(decreasing)), - std::make_shared(BooleanScalar(strictly_decreasing))}, - {"increasing", "strictly_increasing", "decreasing", "strictly_decreasing"})); + *out, StructScalar::Make({std::make_shared(increasing), + std::make_shared(strictly_increasing), + std::make_shared(decreasing), + std::make_shared(strictly_decreasing)}, + {"increasing", "strictly_increasing", "decreasing", + "strictly_decreasing"})); return Status::OK(); } -// todo(mb): floats template Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { using ArrayType = typename TypeTraits::ArrayType; using CType = typename TypeTraits::CType; - // Not sure if this can fail. + IsMonotonicOptions::NullHandling null_handling = IsMonotonicState::Get(ctx).null_handling; EqualOptions equal_options = IsMonotonicState::Get(ctx).equal_options; @@ -63,96 +61,111 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { // - Made sure there is at least one input datum. Datum input = batch[0]; - // Validate input datum type - // - how much is handled by the type stuff in the registry? - // I think this is unreachable (at least when invoked through the compute registry) + // Validate input datum type (useful for direct invocation only). if (!input.is_array()) { return Status::Invalid("IsMonotonic expects array datum as input"); } - // Made sure that the input datum is an array. + // Safety: + // - Made sure that the input datum is an array. const std::shared_ptr& array_data = input.array(); ArrayType array(array_data); // Return early if there are zero elements or one element in the array. // And return early if there are only nulls. if (array.length() <= 1 || array.null_count() == array.length()) { - return IsMonotonicOutput(true, true, true, true, out); + // It is strictly increasing if there are zero or one elements or when nulls are + // ignored. + bool strictly = + array.length() <= 1 || null_handling == IsMonotonicOptions::NullHandling::IGNORE; + return IsMonotonicOutput(true, strictly, true, strictly, out); } // Set null value based on option. - auto null_value = IsMonotonicOptions::NullHandling::MIN_INF - ? std::numeric_limits::min() - : std::numeric_limits::max(); + const CType null_value = null_handling == IsMonotonicOptions::NullHandling::MIN + ? std::numeric_limits::min() + : std::numeric_limits::max(); - bool inc = true, s_inc = true, dec = true, s_dec = true; + bool increasing = true, strictly_increasing = true, decreasing = true, + strictly_decreasing = true; // Safety: // - Made sure that the length is at least 2 above. - for (auto a = array.begin(), b = ++array.begin(); b != array.end(); ++a, ++b) { + for (auto a = array.begin(), b = ++array.begin(); b != array.end();) { auto current = *a; auto next = *b; - // Handle nulls - if (!current.has_value() || !next.has_value()) { - if (null_handling == IsMonotonicOptions::NullHandling::IGNORE) { - // Skip this iteration. + // Handle nulls. + if (null_handling == IsMonotonicOptions::NullHandling::IGNORE) { + // Forward both iterators to search for a non-null value. The loop exit + // condition prevents reading past the end. + if (!current.has_value()) { + ++a; + ++b; continue; - } else { - current = current.value_or(null_value); - next = next.value_or(null_value); } + // Once we have a value for current we should also make sure that next has a value. + // The loop exit condition prevents reading past the end. + if (!next.has_value()) { + ++b; + continue; + } + } else { + // Based on the function options set null values to min/max. + current = current.value_or(null_value); + next = next.value_or(null_value); } - // Check if increasing. - if (inc) { + if (increasing) { if (!(next >= current)) { - inc = false; + increasing = false; // Can't be strictly increasing if not increasing. - s_inc = false; + strictly_increasing = false; } } - // Check if strictly increasing. - if (s_inc) { + if (strictly_increasing) { if (!(next > current)) { - s_inc = false; + strictly_increasing = false; } } - // Check if decreasing. - if (dec) { + if (decreasing) { if (!(next <= current)) { - dec = false; + decreasing = false; // Can't be strictly decreasing if not decreasing. - s_dec = false; + strictly_decreasing = false; } } // Check if strictly decreasing. - if (s_dec) { + if (strictly_decreasing) { if (!(next < current)) { - s_dec = false; + strictly_decreasing = false; } } // Early exit if all failed: - if (!inc && !s_inc && !dec && !s_dec) { + if (!increasing && !strictly_increasing && !decreasing && !strictly_decreasing) { break; + } else { + ++a; + ++b; } } // Output - return IsMonotonicOutput(inc, s_inc, dec, s_dec, out); + return IsMonotonicOutput(increasing, strictly_increasing, decreasing, + strictly_decreasing, out); } } // namespace -// todo(mb): update const FunctionDoc is_monotonic_doc{ - "Returns if the array contains monotonically increasing/decreasing" - "values", - ("Returns a BooleanScalar(true) only if all the elements of the input array \n" - "are in ascending/descending order.\n" - "The options define the order that is used to determine the monotonicity.\n" - "Always returns BooleanScalar.\n" + "Returns whether the array contains monotonically (strictly)" + "increasing/decreasing values", + ("Returns a StructScalar indicating whether the values in the array are \n" + "increasing, strictly increasing, decreasing and/or strictly decreasing.\n" + "Output type is struct.\n" + "Null values are ignored by default.\n" "Implemented for arrays with well-ordered element types."), {"array"}, "IsMonotonicOptions"}; @@ -168,8 +181,8 @@ Status AddIsMonotonicKernel(VectorFunction* func) { VectorKernel is_monotonic_base; is_monotonic_base.init = IsMonotonicState::Init; is_monotonic_base.can_execute_chunkwise = false; - is_monotonic_base.signature = - KernelSignature::Make({TypeTraits::type_singleton()}, output_type); + is_monotonic_base.signature = KernelSignature::Make( + {InputType::Array(TypeTraits::type_singleton())}, output_type); is_monotonic_base.exec = IsMonotonic; return func->AddKernel(is_monotonic_base); } @@ -179,9 +192,35 @@ void RegisterVectorIsMonotonic(FunctionRegistry* registry) { auto func = std::make_shared("is_monotonic", Arity::Unary(), &is_monotonic_doc, &default_options); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Signed and unsigned integer types DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(AddIsMonotonicKernel(func.get())); - // todo(mb): add other types + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Floating point types + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Temportal types + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Decimal types + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(registry->AddFunction(std::move(func))); } diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc index 94a28a7c32e..14724a3186f 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -29,9 +29,9 @@ namespace arrow { namespace compute { -void CheckIsMonotonic(Datum input, bool increasing, bool strictly_increasing, - bool decreasing, bool strictly_decreasing, - IsMonotonicOptions options = IsMonotonicOptions::Defaults()) { +void Check(Datum input, bool increasing, bool strictly_increasing, bool decreasing, + bool strictly_decreasing, + const IsMonotonicOptions options = IsMonotonicOptions::Defaults()) { ASSERT_OK_AND_ASSIGN(Datum out, CallFunction("is_monotonic", {input}, &options)); const StructScalar& output = out.scalar_as(); @@ -46,55 +46,123 @@ void CheckIsMonotonic(Datum input, bool increasing, bool strictly_increasing, } TEST(TestIsMonotonicKernel, VectorFunction) { - // Primitive arrays + const IsMonotonicOptions min(IsMonotonicOptions::NullHandling::MIN); + const IsMonotonicOptions max(IsMonotonicOptions::NullHandling::MAX); + // Primitive arrays // These tests should early exit (based on length). - CheckIsMonotonic(ArrayFromJSON(int8(), "[]"), true, true, true, true); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[null]"), true, true, true, true); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[0]"), true, true, true, true); - - // // Both monotonic increasing and decreasing when all values are the same. - // CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, 0]"), true, false, true, false); - - // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, false, - // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[0, 0, 0, null]"), false, false, - // true, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, - // false, false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, null]"), - // true, false, true, false); - - // // Monotonic (strictly) increasing - // CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, 4]"), true, false, false, false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, - // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, - // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 1, 3, null]"), false, - // false, false, false); - - // CheckIsMonotonic(ArrayFromJSON(int8(), "[-1, 2, 3, 4]"), true, true, false, false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, false, false, - // false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, - // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 3, null]"), false, false, - // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[1, 2, 1, 2]"), false, false, - // false, false); - - // // Monotonic (strictly) decreasing - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, 1]"), false, false, true, false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, - // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, - // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, - // false, false, false); - - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, 1]"), false, false, true, true); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, - // false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, - // false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, - // false, false); CheckIsMonotonic(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, - // false, false, - // false); - // CheckIsMonotonic(ArrayFromJSON(int8(), "[4, 3, 4, 3]"), false, false, false, false); + Check(ArrayFromJSON(int8(), "[]"), true, true, true, true); + Check(ArrayFromJSON(int8(), "[null]"), true, true, true, true); + Check(ArrayFromJSON(int8(), "[null]"), true, true, true, true, min); + Check(ArrayFromJSON(int8(), "[null]"), true, true, true, true, max); + Check(ArrayFromJSON(int8(), "[0]"), true, true, true, true); + + // Both monotonic increasing and decreasing when all values are the same. + Check(ArrayFromJSON(int8(), "[0, 0, 0, 0]"), true, false, true, false); + + Check(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, true, false); + Check(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), false, false, true, false, max); + + Check(ArrayFromJSON(int8(), "[0, 0, 0, null]"), true, false, true, false); + Check(ArrayFromJSON(int8(), "[0, 0, 0, null]"), false, false, true, false, min); + Check(ArrayFromJSON(int8(), "[0, 0, 0, null]"), true, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[0, null, 0, 0]"), true, false, true, false); + Check(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, null, null]"), true, true, true, true); + Check(ArrayFromJSON(int8(), "[null, null, null]"), true, false, true, false, min); + Check(ArrayFromJSON(int8(), "[null, null, null]"), true, false, true, false, max); + + // Monotonic (strictly) increasing + Check(ArrayFromJSON(int8(), "[1, 1, 3, 4]"), true, false, false, false); + + Check(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 1, null, 4]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 1, 3, null]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[1, 1, 3, null]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, 1, 3, null]"), true, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[-1, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[-1, 2, 3, 4, 4]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[-1, 2, 3, 4, 5]"), true, true, false, false); + + Check(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, null, 3, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, null, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 2, 3, null]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[1, 2, 3, null]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, 2, 3, null]"), true, true, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 2, 1, 2]"), false, false, false, false); + + // Monotonic (strictly) decreasing + Check(ArrayFromJSON(int8(), "[4, 4, 2, 1]"), false, false, true, false); + + Check(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, false); + Check(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, false, min); + Check(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, true, false); + Check(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, true, true, max); + + Check(ArrayFromJSON(int8(), "[4, 3, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[5, 4, 3, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[5, 4, 3, 2, 2]"), false, false, true, false); + + Check(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true, min); + Check(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, false, min); + Check(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, true, true, max); + + Check(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, true, false, max); + + Check(ArrayFromJSON(int8(), "[4, 3, 4, 3]"), false, false, false, false); + + // Other types + // Boolean + Check(ArrayFromJSON(boolean(), "[true, true, false]"), false, false, true, false); + Check(ArrayFromJSON(boolean(), "[true, false]"), false, false, true, true); + // Floating point + // todo(mb) + // Temporal + Check(ArrayFromJSON(date32(), "[1, 2, 3, 4, 4]"), true, false, false, false); + Check(ArrayFromJSON(date64(), "[0, 0, 3, 4, 4]"), true, false, false, false); } } // namespace compute From dd66eb3d4ce0d0b767c450c57a5b73cc7bd82c98 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 14 Dec 2021 10:18:27 +0100 Subject: [PATCH 4/9] Update comment to allow doxygen to detect docstrings Co-authored-by: Benjamin Kietzman --- cpp/src/arrow/compute/api_vector.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 1384c998720..1355eeb0dca 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -192,7 +192,7 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions { class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { public: enum NullHandling { - // Ignore nulls. + /// Ignore nulls. IGNORE, // Use min value of element type as the value of nulls. MIN, From 347e1027e599539efe6110c4aadff61e765e3d3d Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Tue, 14 Dec 2021 18:28:41 +0100 Subject: [PATCH 5/9] Modify function options to handle floating point numbers --- cpp/src/arrow/compare.cc | 17 -- cpp/src/arrow/compare.h | 12 +- cpp/src/arrow/compute/api_aggregate.h | 1 + cpp/src/arrow/compute/api_vector.cc | 16 +- cpp/src/arrow/compute/api_vector.h | 18 ++- cpp/src/arrow/compute/function_internal.h | 23 --- .../compute/kernels/vector_is_monotonic.cc | 150 +++++++++++++----- .../kernels/vector_is_monotonic_test.cc | 13 +- cpp/src/arrow/type_traits.h | 3 + 9 files changed, 146 insertions(+), 107 deletions(-) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 450c3acd3b5..d4b94e53838 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -60,22 +59,6 @@ using internal::OptionalBitmapEquals; // ---------------------------------------------------------------------- // Public method implementations -bool EqualOptions::Equals(const EqualOptions& other) const { - return nans_equal_ == other.nans_equal() && atol_ == other.atol() && - diff_sink_ == other.diff_sink(); -} - -std::string EqualOptions::ToString() const { - std::stringstream ss; - if (nans_equal_) { - ss << "NaNs EQUAL "; - } else { - ss << "NaNs NOT EQUAL "; - } - ss << "atol: " << atol_; - return ss.str(); -} - namespace { // TODO also handle HALF_FLOAT NaNs diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h index abee230ee31..6769b23867b 100644 --- a/cpp/src/arrow/compare.h +++ b/cpp/src/arrow/compare.h @@ -22,7 +22,6 @@ #include #include -#include "arrow/util/compare.h" #include "arrow/util/macros.h" #include "arrow/util/visibility.h" @@ -37,7 +36,7 @@ struct Scalar; static constexpr double kDefaultAbsoluteTolerance = 1E-5; /// A container of options for equality comparisons -class EqualOptions : public util::EqualityComparable { +class EqualOptions { public: /// Whether or not NaNs are considered equal. bool nans_equal() const { return nans_equal_; } @@ -72,15 +71,6 @@ class EqualOptions : public util::EqualityComparable { return res; } - /// \brief Return a string representation of this EqualOptions suitable for printing. - std::string ToString() const; - - using util::EqualityComparable::Equals; - using util::EqualityComparable::operator==; - using util::EqualityComparable::operator!=; - /// \brief Return true if both EqualOptions are the same. - bool Equals(const EqualOptions& other) const; - static EqualOptions Defaults() { return {}; } protected: diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index 489a001da9b..c8df81773d4 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -394,6 +394,7 @@ Result Index(const Datum& value, const IndexOptions& options, ExecContext* ctx = NULLPTR); namespace internal { + /// Internal use only: streaming group identifier. /// Consumes batches of keys and yields batches of the group ids. class ARROW_EXPORT Grouper { diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 5ab5dec59bd..6cf39dc765d 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -91,10 +91,9 @@ struct EnumTraits }; template <> struct EnumTraits - : BasicEnumTraits { + : BasicEnumTraits< + IsMonotonicOptions::NullHandling, IsMonotonicOptions::NullHandling::IGNORE, + IsMonotonicOptions::NullHandling::MIN, IsMonotonicOptions::NullHandling::MAX> { static std::string name() { return "IsMonotonicOptions::NullHandling"; } static std::string value_name(IsMonotonicOptions::NullHandling value) { switch (value) { @@ -157,7 +156,8 @@ static auto kSelectKOptionsType = GetFunctionOptionsType( DataMember("sort_keys", &SelectKOptions::sort_keys)); static auto kIsMonotonicOptionsType = GetFunctionOptionsType( DataMember("null_handling", &IsMonotonicOptions::null_handling), - DataMember("equal_options", &IsMonotonicOptions::equal_options)); + DataMember("floating_approximate", &IsMonotonicOptions::floating_approximate), + DataMember("epsilon", &IsMonotonicOptions::epsilon)); } // namespace } // namespace internal @@ -201,10 +201,12 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector sort_keys) constexpr char SelectKOptions::kTypeName[]; IsMonotonicOptions::IsMonotonicOptions(IsMonotonicOptions::NullHandling null_handling, - EqualOptions equal_options) + bool floating_approximate, + double epsilon) : FunctionOptions(internal::kIsMonotonicOptionsType), null_handling(null_handling), - equal_options(equal_options) {} + floating_approximate(floating_approximate), + epsilon(epsilon) {} constexpr char IsMonotonicOptions::kTypeName[]; namespace internal { diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 1355eeb0dca..4b22b1e9580 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -194,21 +194,25 @@ class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { enum NullHandling { /// Ignore nulls. IGNORE, - // Use min value of element type as the value of nulls. + /// Use min value of element type as the value of nulls. MIN, - // Use max value of element type as the value of nulls. + /// Use max value of element type as the value of nulls. MAX, }; explicit IsMonotonicOptions(NullHandling null_handling = IGNORE, - EqualOptions equal_options = EqualOptions::Defaults()); + bool floating_approximate = false, + double epsilon = kDefaultAbsoluteTolerance); constexpr static char const kTypeName[] = "IsMonotonicOptions"; static IsMonotonicOptions Defaults() { return IsMonotonicOptions(); } - // Define how nulls are handled. - NullHandling null_handling; - // Options for equality comparisons. Used for floats. - EqualOptions equal_options; + /// Define how nulls are handled. + NullHandling null_handling = IGNORE; + /// Whether or not to use approximate floating point number comparisons. + bool floating_approximate = false; + /// Epsilon (error bound) value used when approximately comparing floating points + /// numbers. + double epsilon = kDefaultAbsoluteTolerance; }; /// @} diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 1e2c951f6e6..0395ab3e9fb 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -205,8 +205,6 @@ static inline std::string GenericToString(const std::vector& value) { return ss.str(); } -static inline std::string GenericToString(EqualOptions value) { return value.ToString(); } - template static inline bool GenericEquals(const T& left, const T& right) { return left == right; @@ -368,12 +366,6 @@ static inline Result> GenericToScalar(const Datum& value } } -static inline Result> GenericToScalar(const EqualOptions& value) { - ARROW_ASSIGN_OR_RAISE(auto nans_equal, GenericToScalar(value.nans_equal())); - ARROW_ASSIGN_OR_RAISE(auto atol, GenericToScalar(value.atol())); - return StructScalar::Make({nans_equal, atol}, {"nans_equal", "atol"}); -} - template static inline enable_if_primitive_ctype::ArrowType, Result> GenericFromScalar(const std::shared_ptr& value) { @@ -494,21 +486,6 @@ GenericFromScalar(const std::shared_ptr& value) { return result; } -template -static inline enable_if_same_result GenericFromScalar( - const std::shared_ptr& value) { - if (value->type->id() != Type::STRUCT) { - return Status::Invalid("Expected type STRUCT but got ", value->type->id()); - } - if (!value->is_valid) return Status::Invalid("Got null scalar"); - const auto& holder = checked_cast(*value); - ARROW_ASSIGN_OR_RAISE(auto nans_equal_holder, holder.field("nans_equal")); - ARROW_ASSIGN_OR_RAISE(auto atol_holder, holder.field("atol")); - ARROW_ASSIGN_OR_RAISE(auto nans_equal, GenericFromScalar(nans_equal_holder)); - ARROW_ASSIGN_OR_RAISE(auto atol, GenericFromScalar(atol_holder)); - return EqualOptions().nans_equal(nans_equal).atol(atol); -} - template struct StringifyImpl { template diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index 1a04d07f0aa..f6664f86caf 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -43,14 +43,100 @@ Status IsMonotonicOutput(bool increasing, bool strictly_increasing, bool decreas return Status::OK(); } -template +template +enable_if_floating_point IsMonotonicCheck( + const typename DataType::c_type& current, const typename DataType::c_type& next, + bool* increasing, bool* strictly_increasing, bool* decreasing, + bool* strictly_decreasing, const IsMonotonicOptions& options) { + // Short circuit for NaNs. + // https://en.wikipedia.org/wiki/NaN#Comparison_with_NaN + if (std::isnan(current) || std::isnan(next)) { + *increasing = false; + *strictly_increasing = false; + *decreasing = false; + *strictly_decreasing = false; + } else { + if (*increasing || *decreasing) { + bool equal = + // Equal if exactly equal. + current == next || + // Or if approximately equal within some error bound (epsilon). + (options.floating_approximate && + (fabs(current - next) <= + static_cast(options.epsilon))); + if (*increasing) { + if (!(equal || next > current)) { + *increasing = false; + *strictly_increasing = false; + } + } + if (*decreasing) { + if (!(equal || next < current)) { + *decreasing = false; + *strictly_decreasing = false; + } + } + } + if (*strictly_increasing) { + if (!(next > current)) { + *strictly_increasing = false; + } + } + if (*strictly_decreasing) { + if (!(next < current)) { + *strictly_decreasing = false; + } + } + } +} + +template +enable_if_not_floating_point IsMonotonicCheck( + const typename DataType::c_type& current, const typename DataType::c_type& next, + bool* increasing, bool* strictly_increasing, bool* decreasing, + bool* strictly_decreasing, const IsMonotonicOptions& options) { + if (*increasing) { + if (!(next >= current)) { + *increasing = false; + *strictly_increasing = false; + } + } + if (*strictly_increasing) { + if (!(next > current)) { + *strictly_increasing = false; + } + } + if (*decreasing) { + if (!(next <= current)) { + *decreasing = false; + *strictly_decreasing = false; + } + } + if (*strictly_decreasing) { + if (!(next < current)) { + *strictly_decreasing = false; + } + } +} + +template +enable_if_floating_point isnan( + const util::optional& opt) { + return opt.has_value() && std::isnan(opt.value()); +} + +template +enable_if_not_floating_point isnan( + const util::optional& opt) { + return false; +} + +template Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { - using ArrayType = typename TypeTraits::ArrayType; - using CType = typename TypeTraits::CType; + using ArrayType = typename TypeTraits::ArrayType; + using CType = typename TypeTraits::CType; - IsMonotonicOptions::NullHandling null_handling = - IsMonotonicState::Get(ctx).null_handling; - EqualOptions equal_options = IsMonotonicState::Get(ctx).equal_options; + auto options = IsMonotonicState::Get(ctx); // Check batch size if (batch.values.size() != 1) { @@ -71,18 +157,22 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { const std::shared_ptr& array_data = input.array(); ArrayType array(array_data); - // Return early if there are zero elements or one element in the array. + // Return early if there are NaNs, zero elements or one element in the array. // And return early if there are only nulls. if (array.length() <= 1 || array.null_count() == array.length()) { - // It is strictly increasing if there are zero or one elements or when nulls are - // ignored. - bool strictly = - array.length() <= 1 || null_handling == IsMonotonicOptions::NullHandling::IGNORE; - return IsMonotonicOutput(true, strictly, true, strictly, out); + if (std::any_of(array.begin(), array.end(), isnan)) { + return IsMonotonicOutput(false, false, false, false, out); + } else { + // It is strictly increasing if there are zero or one elements or when nulls are + // ignored. + bool strictly = array.length() <= 1 || + options.null_handling == IsMonotonicOptions::NullHandling::IGNORE; + return IsMonotonicOutput(true, strictly, true, strictly, out); + } } // Set null value based on option. - const CType null_value = null_handling == IsMonotonicOptions::NullHandling::MIN + const CType null_value = options.null_handling == IsMonotonicOptions::NullHandling::MIN ? std::numeric_limits::min() : std::numeric_limits::max(); @@ -96,7 +186,7 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { auto next = *b; // Handle nulls. - if (null_handling == IsMonotonicOptions::NullHandling::IGNORE) { + if (options.null_handling == IsMonotonicOptions::NullHandling::IGNORE) { // Forward both iterators to search for a non-null value. The loop exit // condition prevents reading past the end. if (!current.has_value()) { @@ -104,8 +194,8 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { ++b; continue; } - // Once we have a value for current we should also make sure that next has a value. - // The loop exit condition prevents reading past the end. + // Once we have a value for current we should also make sure that next has a + // value. The loop exit condition prevents reading past the end. if (!next.has_value()) { ++b; continue; @@ -116,31 +206,9 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { next = next.value_or(null_value); } - if (increasing) { - if (!(next >= current)) { - increasing = false; - // Can't be strictly increasing if not increasing. - strictly_increasing = false; - } - } - if (strictly_increasing) { - if (!(next > current)) { - strictly_increasing = false; - } - } - if (decreasing) { - if (!(next <= current)) { - decreasing = false; - // Can't be strictly decreasing if not decreasing. - strictly_decreasing = false; - } - } - // Check if strictly decreasing. - if (strictly_decreasing) { - if (!(next < current)) { - strictly_decreasing = false; - } - } + IsMonotonicCheck(current.value(), next.value(), &increasing, + &strictly_increasing, &decreasing, &strictly_decreasing, + options); // Early exit if all failed: if (!increasing && !strictly_increasing && !decreasing && !strictly_decreasing) { diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc index 14724a3186f..cfaf90c4082 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -158,8 +158,19 @@ TEST(TestIsMonotonicKernel, VectorFunction) { // Boolean Check(ArrayFromJSON(boolean(), "[true, true, false]"), false, false, true, false); Check(ArrayFromJSON(boolean(), "[true, false]"), false, false, true, true); + // Floating point - // todo(mb) + const IsMonotonicOptions approx(IsMonotonicOptions::NullHandling::IGNORE, true); + const IsMonotonicOptions approx_large(IsMonotonicOptions::NullHandling::IGNORE, true, + 1); + + Check(ArrayFromJSON(float32(), "[NaN]"), false, false, false, false); + Check(ArrayFromJSON(float32(), "[NaN, NaN]"), false, false, false, false); + Check(ArrayFromJSON(float32(), "[NaN, NaN, NaN]"), false, false, false, false); + + Check(ArrayFromJSON(float32(), "[1, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(float64(), "[1, 2, 3, 4]"), true, true, false, false); + // Temporal Check(ArrayFromJSON(date32(), "[1, 2, 3, 4, 4]"), true, false, false, false); Check(ArrayFromJSON(date64(), "[0, 0, 3, 4, 4]"), true, false, false, false); diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 4b4cb5d15d3..6852fd54d47 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -542,6 +542,9 @@ using is_floating_type = std::is_base_of; template using enable_if_floating_point = enable_if_t::value, R>; +template +using enable_if_not_floating_point = enable_if_t::value, R>; + // Half floats are special in that they behave physically like an unsigned // integer. template From 841829c4e07a717ef636d25bc314e3bdab710161 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 15 Dec 2021 10:38:13 +0100 Subject: [PATCH 6/9] Add tests for floating point numbers --- cpp/src/arrow/compute/api_vector.cc | 3 +- cpp/src/arrow/compute/api_vector.h | 4 +- .../compute/kernels/vector_is_monotonic.cc | 62 ++++++++++++------- .../kernels/vector_is_monotonic_test.cc | 39 ++++++++++-- 4 files changed, 78 insertions(+), 30 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 6cf39dc765d..111797c44af 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -201,8 +201,7 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector sort_keys) constexpr char SelectKOptions::kTypeName[]; IsMonotonicOptions::IsMonotonicOptions(IsMonotonicOptions::NullHandling null_handling, - bool floating_approximate, - double epsilon) + bool floating_approximate, double epsilon) : FunctionOptions(internal::kIsMonotonicOptionsType), null_handling(null_handling), floating_approximate(floating_approximate), diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 4b22b1e9580..1a73b2871ed 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -195,9 +195,11 @@ class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { /// Ignore nulls. IGNORE, /// Use min value of element type as the value of nulls. + /// -Inf for floating point numbers. MIN, /// Use max value of element type as the value of nulls. - MAX, + /// Inf for floating point numbers. + MAX }; explicit IsMonotonicOptions(NullHandling null_handling = IGNORE, diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index f6664f86caf..40a3ea3f7f5 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -56,34 +56,32 @@ enable_if_floating_point IsMonotonicCheck( *decreasing = false; *strictly_decreasing = false; } else { - if (*increasing || *decreasing) { - bool equal = - // Equal if exactly equal. - current == next || - // Or if approximately equal within some error bound (epsilon). - (options.floating_approximate && - (fabs(current - next) <= - static_cast(options.epsilon))); - if (*increasing) { - if (!(equal || next > current)) { - *increasing = false; - *strictly_increasing = false; - } + bool equal = + // Approximately equal within some error bound (epsilon). + (options.floating_approximate && + (fabs(current - next) <= + static_cast(options.epsilon))) || + // Or exactly equal. + current == next; + if (*increasing) { + if (!(equal || next > current)) { + *increasing = false; + *strictly_increasing = false; } - if (*decreasing) { - if (!(equal || next < current)) { - *decreasing = false; - *strictly_decreasing = false; - } + } + if (*decreasing) { + if (!(equal || next < current)) { + *decreasing = false; + *strictly_decreasing = false; } } if (*strictly_increasing) { - if (!(next > current)) { + if (equal || !(next > current)) { *strictly_increasing = false; } } if (*strictly_decreasing) { - if (!(next < current)) { + if (equal || !(next < current)) { *strictly_decreasing = false; } } @@ -131,6 +129,26 @@ enable_if_not_floating_point isnan( return false; } +template +constexpr enable_if_floating_point min() { + return -std::numeric_limits::infinity(); +} + +template +constexpr enable_if_floating_point max() { + return std::numeric_limits::infinity(); +} + +template +constexpr enable_if_not_floating_point min() { + return std::numeric_limits::min(); +} + +template +constexpr enable_if_not_floating_point max() { + return std::numeric_limits::max(); +} + template Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { using ArrayType = typename TypeTraits::ArrayType; @@ -173,8 +191,8 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { // Set null value based on option. const CType null_value = options.null_handling == IsMonotonicOptions::NullHandling::MIN - ? std::numeric_limits::min() - : std::numeric_limits::max(); + ? min() + : max(); bool increasing = true, strictly_increasing = true, decreasing = true, strictly_decreasing = true; diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc index cfaf90c4082..3ecc9de36e8 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -17,7 +17,7 @@ #include -#include "arrow/compute/api_vector.cc" +#include "arrow/compute/api_vector.h" #include "arrow/compute/exec.h" #include "arrow/compute/kernels/test_util.h" #include "arrow/datum.h" @@ -160,16 +160,45 @@ TEST(TestIsMonotonicKernel, VectorFunction) { Check(ArrayFromJSON(boolean(), "[true, false]"), false, false, true, true); // Floating point - const IsMonotonicOptions approx(IsMonotonicOptions::NullHandling::IGNORE, true); - const IsMonotonicOptions approx_large(IsMonotonicOptions::NullHandling::IGNORE, true, - 1); + const IsMonotonicOptions approx(IsMonotonicOptions::NullHandling::IGNORE, true, 1e-1); Check(ArrayFromJSON(float32(), "[NaN]"), false, false, false, false); Check(ArrayFromJSON(float32(), "[NaN, NaN]"), false, false, false, false); Check(ArrayFromJSON(float32(), "[NaN, NaN, NaN]"), false, false, false, false); + Check(ArrayFromJSON(float32(), "[NaN, 1, 2, 3]"), false, false, false, false); + + Check(ArrayFromJSON(float32(), "[-Inf, 0, Inf]"), true, true, false, false); + Check(ArrayFromJSON(float32(), "[-Inf, -Inf, Inf]"), true, false, false, false); + Check(ArrayFromJSON(float32(), "[Inf, 0, -Inf]"), false, false, true, true); + Check(ArrayFromJSON(float32(), "[Inf, Inf, -Inf]"), false, false, true, false); + + Check(ArrayFromJSON(float64(), "[-Inf, Inf, null]"), false, false, false, false, min); + Check(ArrayFromJSON(float64(), "[-Inf, Inf, null]"), true, false, false, false, max); + Check(ArrayFromJSON(float64(), "[Inf, -Inf, null]"), false, false, true, false, min); + Check(ArrayFromJSON(float64(), "[Inf, -Inf, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(float32(), "[-Inf, null, Inf]"), true, false, false, false, min); + Check(ArrayFromJSON(float32(), "[-Inf, null, Inf]"), true, false, false, false, max); + Check(ArrayFromJSON(float32(), "[Inf, null, -Inf]"), false, false, true, false, min); + Check(ArrayFromJSON(float32(), "[Inf, null, -Inf]"), false, false, true, false, max); + + Check(ArrayFromJSON(float32(), "[-Inf, 0, null, Inf]"), false, false, false, false, + min); + Check(ArrayFromJSON(float32(), "[-Inf, 0, null, Inf]"), true, false, false, false, max); + Check(ArrayFromJSON(float32(), "[Inf, 0, null, -Inf]"), false, false, true, false, min); + Check(ArrayFromJSON(float32(), "[Inf, 0, null, -Inf]"), false, false, false, false, + max); + + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 1.04]"), true, true, false, + false); + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 1.04]"), true, false, true, false, + approx); + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 2]"), true, true, false, false); + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 2]"), true, false, false, false, + approx); Check(ArrayFromJSON(float32(), "[1, 2, 3, 4]"), true, true, false, false); - Check(ArrayFromJSON(float64(), "[1, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(float64(), "[4, 3, 2, 1]"), false, false, true, true); // Temporal Check(ArrayFromJSON(date32(), "[1, 2, 3, 4, 4]"), true, false, false, false); From 275dfe852e33c9f5fc63c57fded457b2fbf8f3fb Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Wed, 15 Dec 2021 13:47:34 +0100 Subject: [PATCH 7/9] Rename NullHandling variants --- cpp/src/arrow/compute/api_vector.cc | 19 ++++++++++--------- cpp/src/arrow/compute/api_vector.h | 10 +++++----- .../compute/kernels/vector_is_monotonic.cc | 14 ++++++++------ .../kernels/vector_is_monotonic_test.cc | 7 ++++--- 4 files changed, 27 insertions(+), 23 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 111797c44af..363c764ade1 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -91,18 +91,19 @@ struct EnumTraits }; template <> struct EnumTraits - : BasicEnumTraits< - IsMonotonicOptions::NullHandling, IsMonotonicOptions::NullHandling::IGNORE, - IsMonotonicOptions::NullHandling::MIN, IsMonotonicOptions::NullHandling::MAX> { + : BasicEnumTraits { static std::string name() { return "IsMonotonicOptions::NullHandling"; } static std::string value_name(IsMonotonicOptions::NullHandling value) { switch (value) { - case IsMonotonicOptions::NullHandling::IGNORE: - return "IGNORE"; - case IsMonotonicOptions::NullHandling::MIN: - return "MIN"; - case IsMonotonicOptions::NullHandling::MAX: - return "MAX"; + case IsMonotonicOptions::NullHandling::IGNORE_NULLS: + return "IGNORE_NULLS"; + case IsMonotonicOptions::NullHandling::USE_MIN_VALUE: + return "USE_MIN_VALUE"; + case IsMonotonicOptions::NullHandling::USE_MAX_VALUE: + return "USE_MAX_VALUE"; } return ""; } diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 1a73b2871ed..dcece04ab99 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -193,23 +193,23 @@ class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { public: enum NullHandling { /// Ignore nulls. - IGNORE, + IGNORE_NULLS, /// Use min value of element type as the value of nulls. /// -Inf for floating point numbers. - MIN, + USE_MIN_VALUE, /// Use max value of element type as the value of nulls. /// Inf for floating point numbers. - MAX + USE_MAX_VALUE }; - explicit IsMonotonicOptions(NullHandling null_handling = IGNORE, + explicit IsMonotonicOptions(NullHandling null_handling = IGNORE_NULLS, bool floating_approximate = false, double epsilon = kDefaultAbsoluteTolerance); constexpr static char const kTypeName[] = "IsMonotonicOptions"; static IsMonotonicOptions Defaults() { return IsMonotonicOptions(); } /// Define how nulls are handled. - NullHandling null_handling = IGNORE; + NullHandling null_handling = IGNORE_NULLS; /// Whether or not to use approximate floating point number comparisons. bool floating_approximate = false; /// Epsilon (error bound) value used when approximately comparing floating points diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index 40a3ea3f7f5..e2a6249431f 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -183,16 +183,18 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { } else { // It is strictly increasing if there are zero or one elements or when nulls are // ignored. - bool strictly = array.length() <= 1 || - options.null_handling == IsMonotonicOptions::NullHandling::IGNORE; + bool strictly = + array.length() <= 1 || + options.null_handling == IsMonotonicOptions::NullHandling::IGNORE_NULLS; return IsMonotonicOutput(true, strictly, true, strictly, out); } } // Set null value based on option. - const CType null_value = options.null_handling == IsMonotonicOptions::NullHandling::MIN - ? min() - : max(); + const CType null_value = + options.null_handling == IsMonotonicOptions::NullHandling::USE_MIN_VALUE + ? min() + : max(); bool increasing = true, strictly_increasing = true, decreasing = true, strictly_decreasing = true; @@ -204,7 +206,7 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { auto next = *b; // Handle nulls. - if (options.null_handling == IsMonotonicOptions::NullHandling::IGNORE) { + if (options.null_handling == IsMonotonicOptions::NullHandling::IGNORE_NULLS) { // Forward both iterators to search for a non-null value. The loop exit // condition prevents reading past the end. if (!current.has_value()) { diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc index 3ecc9de36e8..68640dd74a2 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -46,8 +46,8 @@ void Check(Datum input, bool increasing, bool strictly_increasing, bool decreasi } TEST(TestIsMonotonicKernel, VectorFunction) { - const IsMonotonicOptions min(IsMonotonicOptions::NullHandling::MIN); - const IsMonotonicOptions max(IsMonotonicOptions::NullHandling::MAX); + const IsMonotonicOptions min(IsMonotonicOptions::NullHandling::USE_MIN_VALUE); + const IsMonotonicOptions max(IsMonotonicOptions::NullHandling::USE_MAX_VALUE); // Primitive arrays // These tests should early exit (based on length). @@ -160,7 +160,8 @@ TEST(TestIsMonotonicKernel, VectorFunction) { Check(ArrayFromJSON(boolean(), "[true, false]"), false, false, true, true); // Floating point - const IsMonotonicOptions approx(IsMonotonicOptions::NullHandling::IGNORE, true, 1e-1); + const IsMonotonicOptions approx(IsMonotonicOptions::NullHandling::IGNORE_NULLS, true, + 1e-1); Check(ArrayFromJSON(float32(), "[NaN]"), false, false, false, false); Check(ArrayFromJSON(float32(), "[NaN, NaN]"), false, false, false, false); From 6816819db7ab25069ffe0e443f5e8e505c96e9d7 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Thu, 16 Dec 2021 10:02:10 +0100 Subject: [PATCH 8/9] Add tests for supported temporal types --- .../compute/kernels/vector_is_monotonic.cc | 12 ++++++---- .../kernels/vector_is_monotonic_test.cc | 23 +++++++++++++++++-- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index e2a6249431f..1391d3ce931 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -269,8 +269,8 @@ Status AddIsMonotonicKernel(VectorFunction* func) { VectorKernel is_monotonic_base; is_monotonic_base.init = IsMonotonicState::Init; is_monotonic_base.can_execute_chunkwise = false; - is_monotonic_base.signature = KernelSignature::Make( - {InputType::Array(TypeTraits::type_singleton())}, output_type); + is_monotonic_base.signature = + KernelSignature::Make({InputType::Array(Type::type_id)}, output_type); is_monotonic_base.exec = IsMonotonic; return func->AddKernel(is_monotonic_base); } @@ -297,14 +297,16 @@ void RegisterVectorIsMonotonic(FunctionRegistry* registry) { DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(AddIsMonotonicKernel(func.get())); - // Temportal types - // DCHECK_OK(AddIsMonotonicKernel(func.get())); - // DCHECK_OK(AddIsMonotonicKernel(func.get())); + // Temporal types + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(AddIsMonotonicKernel(func.get())); // DCHECK_OK(AddIsMonotonicKernel(func.get())); // DCHECK_OK(AddIsMonotonicKernel(func.get())); DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); // Decimal types // DCHECK_OK(AddIsMonotonicKernel(func.get())); diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc index 68640dd74a2..ab0095d3359 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -202,8 +202,27 @@ TEST(TestIsMonotonicKernel, VectorFunction) { Check(ArrayFromJSON(float64(), "[4, 3, 2, 1]"), false, false, true, true); // Temporal - Check(ArrayFromJSON(date32(), "[1, 2, 3, 4, 4]"), true, false, false, false); - Check(ArrayFromJSON(date64(), "[0, 0, 3, 4, 4]"), true, false, false, false); + Check(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 2, 3, 4, 5]"), true, true, false, + false); + Check(ArrayFromJSON(time64(TimeUnit::NANO), "[5, 4, 4, 2, 1]"), false, false, true, + false); + Check(ArrayFromJSON(timestamp(TimeUnit::SECOND), + R"(["1970-01-01","2000-02-29","1900-02-28"])"), + false, false, false, false); + Check(ArrayFromJSON(timestamp(TimeUnit::MILLI, "UTC"), + R"(["1970-01-01","1971-01-01","1972-01-01"])"), + true, true, false, false); + Check(ArrayFromJSON(date32(), "[1, 2, 3, 4, null, 5]"), true, true, false, false); + Check(ArrayFromJSON(date64(), "[1, 2, 3, 4, null, 5]"), false, false, false, false, + max); + Check(ArrayFromJSON(month_interval(), "[1, 2, 3, 4, null, 5]"), true, true, false, + false); + Check(ArrayFromJSON(month_interval(), "[1, 2, 3, 4, null]"), true, true, false, false, + max); + Check(ArrayFromJSON(duration(TimeUnit::SECOND), "[1, 2, 3, 4, 5]"), true, true, false, + false); + Check(ArrayFromJSON(duration(TimeUnit::NANO), "[5, 4, 4, 2, 1]"), false, false, true, + false); } } // namespace compute From 4284fdaa573396d9db63a1cf2c01d4a513b62425 Mon Sep 17 00:00:00 2001 From: Matthijs Brobbel Date: Fri, 17 Dec 2021 11:10:58 +0100 Subject: [PATCH 9/9] Fix early exit comment and improve null handling --- cpp/src/arrow/compute/kernels/vector_is_monotonic.cc | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc index 1391d3ce931..801ed0a9bb1 100644 --- a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -175,7 +175,7 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { const std::shared_ptr& array_data = input.array(); ArrayType array(array_data); - // Return early if there are NaNs, zero elements or one element in the array. + // Return early if there are zero elements or one element in the array. // And return early if there are only nulls. if (array.length() <= 1 || array.null_count() == array.length()) { if (std::any_of(array.begin(), array.end(), isnan)) { @@ -220,15 +220,11 @@ Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { ++b; continue; } - } else { - // Based on the function options set null values to min/max. - current = current.value_or(null_value); - next = next.value_or(null_value); } - IsMonotonicCheck(current.value(), next.value(), &increasing, - &strictly_increasing, &decreasing, &strictly_decreasing, - options); + IsMonotonicCheck(current.value_or(null_value), next.value_or(null_value), + &increasing, &strictly_increasing, &decreasing, + &strictly_decreasing, options); // Early exit if all failed: if (!increasing && !strictly_increasing && !decreasing && !strictly_decreasing) {