diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 5736c557bd0..5e0028cd5db 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -428,6 +428,7 @@ if(ARROW_COMPUTE) compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_hash.cc + compute/kernels/vector_is_monotonic.cc compute/kernels/vector_nested.cc compute/kernels/vector_replace.cc compute/kernels/vector_selection.cc diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index 95114d8d8a5..363c764ade1 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -42,6 +42,7 @@ namespace internal { using compute::DictionaryEncodeOptions; using compute::FilterOptions; +using compute::IsMonotonicOptions; using compute::NullPlacement; template <> @@ -88,6 +89,25 @@ struct EnumTraits return ""; } }; +template <> +struct EnumTraits + : BasicEnumTraits { + static std::string name() { return "IsMonotonicOptions::NullHandling"; } + static std::string value_name(IsMonotonicOptions::NullHandling value) { + switch (value) { + case IsMonotonicOptions::NullHandling::IGNORE_NULLS: + return "IGNORE_NULLS"; + case IsMonotonicOptions::NullHandling::USE_MIN_VALUE: + return "USE_MIN_VALUE"; + case IsMonotonicOptions::NullHandling::USE_MAX_VALUE: + return "USE_MAX_VALUE"; + } + return ""; + } +}; } // namespace internal @@ -135,6 +155,11 @@ static auto kPartitionNthOptionsType = GetFunctionOptionsType( DataMember("k", &SelectKOptions::k), DataMember("sort_keys", &SelectKOptions::sort_keys)); +static auto kIsMonotonicOptionsType = GetFunctionOptionsType( + DataMember("null_handling", &IsMonotonicOptions::null_handling), + DataMember("floating_approximate", &IsMonotonicOptions::floating_approximate), + DataMember("epsilon", &IsMonotonicOptions::epsilon)); + } // namespace } // namespace internal @@ -176,6 +201,14 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector sort_keys) sort_keys(std::move(sort_keys)) {} constexpr char SelectKOptions::kTypeName[]; +IsMonotonicOptions::IsMonotonicOptions(IsMonotonicOptions::NullHandling null_handling, + bool floating_approximate, double epsilon) + : FunctionOptions(internal::kIsMonotonicOptionsType), + null_handling(null_handling), + floating_approximate(floating_approximate), + epsilon(epsilon) {} +constexpr char IsMonotonicOptions::kTypeName[]; + namespace internal { void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType)); @@ -185,6 +218,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kIsMonotonicOptionsType)); } } // namespace internal @@ -280,6 +314,11 @@ Result> ValueCounts(const Datum& value, ExecContext return checked_pointer_cast(result.make_array()); } +Result IsMonotonic(const Datum& value, const IsMonotonicOptions& options, + ExecContext* ctx) { + return CallFunction("is_monotonic", {value}, &options, ctx); +} + // ---------------------------------------------------------------------- // Filter- and take-related selection functions diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 8788d5d160e..dcece04ab99 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -188,6 +188,35 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions { NullPlacement null_placement; }; +/// \brief Options for IsMonotonic +class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions { + public: + enum NullHandling { + /// Ignore nulls. + IGNORE_NULLS, + /// Use min value of element type as the value of nulls. + /// -Inf for floating point numbers. + USE_MIN_VALUE, + /// Use max value of element type as the value of nulls. + /// Inf for floating point numbers. + USE_MAX_VALUE + }; + + explicit IsMonotonicOptions(NullHandling null_handling = IGNORE_NULLS, + bool floating_approximate = false, + double epsilon = kDefaultAbsoluteTolerance); + constexpr static char const kTypeName[] = "IsMonotonicOptions"; + static IsMonotonicOptions Defaults() { return IsMonotonicOptions(); } + + /// Define how nulls are handled. + NullHandling null_handling = IGNORE_NULLS; + /// Whether or not to use approximate floating point number comparisons. + bool floating_approximate = false; + /// Epsilon (error bound) value used when approximately comparing floating points + /// numbers. + double epsilon = kDefaultAbsoluteTolerance; +}; + /// @} /// \brief Filter with a boolean selection filter @@ -494,6 +523,28 @@ Result DictionaryEncode( const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(), ExecContext* ctx = NULLPTR); +/// \brief Returns information about the monotonicity of the elements in an +/// array with well-ordered elements. +/// +/// Returns a struct scalar with type +/// struct< +/// increasing: boolean, +/// strictly_increasing: boolean, +/// decreasing: boolean, +/// strictly_decreasing: boolean +/// > +/// +/// \param[in] data input data. +/// \param[in] options see IsMonotonicOptions for more information. +/// \param[in] ctx the function execution context, optional. +/// \return resulting datum as a struct scalar. +/// +/// \since x.0.0 \note API not yet finalized +ARROW_EXPORT +Result IsMonotonic( + const Datum& data, const IsMonotonicOptions& options = IsMonotonicOptions::Defaults(), + ExecContext* ctx = NULLPTR); + // ---------------------------------------------------------------------- // Deprecated functions diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt b/cpp/src/arrow/compute/kernels/CMakeLists.txt index 28686a9cafa..6e0f2b826b4 100644 --- a/cpp/src/arrow/compute/kernels/CMakeLists.txt +++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt @@ -46,6 +46,7 @@ add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute") add_arrow_compute_test(vector_test SOURCES vector_hash_test.cc + vector_is_monotonic_test.cc vector_nested_test.cc vector_replace_test.cc vector_selection_test.cc diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc new file mode 100644 index 00000000000..801ed0a9bb1 --- /dev/null +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic.cc @@ -0,0 +1,317 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/api_vector.h" +#include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/aggregate_internal.h" +#include "arrow/compute/registry.h" +#include "arrow/util/optional.h" + +namespace arrow { +namespace compute { +namespace internal { + +namespace { +// ---------------------------------------------------------------------- +// IsMonotonic implementation + +using IsMonotonicState = OptionsWrapper; + +Status IsMonotonicOutput(bool increasing, bool strictly_increasing, bool decreasing, + bool strictly_decreasing, Datum* out) { + ARROW_ASSIGN_OR_RAISE( + *out, StructScalar::Make({std::make_shared(increasing), + std::make_shared(strictly_increasing), + std::make_shared(decreasing), + std::make_shared(strictly_decreasing)}, + {"increasing", "strictly_increasing", "decreasing", + "strictly_decreasing"})); + return Status::OK(); +} + +template +enable_if_floating_point IsMonotonicCheck( + const typename DataType::c_type& current, const typename DataType::c_type& next, + bool* increasing, bool* strictly_increasing, bool* decreasing, + bool* strictly_decreasing, const IsMonotonicOptions& options) { + // Short circuit for NaNs. + // https://en.wikipedia.org/wiki/NaN#Comparison_with_NaN + if (std::isnan(current) || std::isnan(next)) { + *increasing = false; + *strictly_increasing = false; + *decreasing = false; + *strictly_decreasing = false; + } else { + bool equal = + // Approximately equal within some error bound (epsilon). + (options.floating_approximate && + (fabs(current - next) <= + static_cast(options.epsilon))) || + // Or exactly equal. + current == next; + if (*increasing) { + if (!(equal || next > current)) { + *increasing = false; + *strictly_increasing = false; + } + } + if (*decreasing) { + if (!(equal || next < current)) { + *decreasing = false; + *strictly_decreasing = false; + } + } + if (*strictly_increasing) { + if (equal || !(next > current)) { + *strictly_increasing = false; + } + } + if (*strictly_decreasing) { + if (equal || !(next < current)) { + *strictly_decreasing = false; + } + } + } +} + +template +enable_if_not_floating_point IsMonotonicCheck( + const typename DataType::c_type& current, const typename DataType::c_type& next, + bool* increasing, bool* strictly_increasing, bool* decreasing, + bool* strictly_decreasing, const IsMonotonicOptions& options) { + if (*increasing) { + if (!(next >= current)) { + *increasing = false; + *strictly_increasing = false; + } + } + if (*strictly_increasing) { + if (!(next > current)) { + *strictly_increasing = false; + } + } + if (*decreasing) { + if (!(next <= current)) { + *decreasing = false; + *strictly_decreasing = false; + } + } + if (*strictly_decreasing) { + if (!(next < current)) { + *strictly_decreasing = false; + } + } +} + +template +enable_if_floating_point isnan( + const util::optional& opt) { + return opt.has_value() && std::isnan(opt.value()); +} + +template +enable_if_not_floating_point isnan( + const util::optional& opt) { + return false; +} + +template +constexpr enable_if_floating_point min() { + return -std::numeric_limits::infinity(); +} + +template +constexpr enable_if_floating_point max() { + return std::numeric_limits::infinity(); +} + +template +constexpr enable_if_not_floating_point min() { + return std::numeric_limits::min(); +} + +template +constexpr enable_if_not_floating_point max() { + return std::numeric_limits::max(); +} + +template +Status IsMonotonic(KernelContext* ctx, const ExecBatch& batch, Datum* out) { + using ArrayType = typename TypeTraits::ArrayType; + using CType = typename TypeTraits::CType; + + auto options = IsMonotonicState::Get(ctx); + + // Check batch size + if (batch.values.size() != 1) { + return Status::Invalid("IsMonotonic expects a single datum (array) as input"); + } + + // Safety: + // - Made sure there is at least one input datum. + Datum input = batch[0]; + + // Validate input datum type (useful for direct invocation only). + if (!input.is_array()) { + return Status::Invalid("IsMonotonic expects array datum as input"); + } + + // Safety: + // - Made sure that the input datum is an array. + const std::shared_ptr& array_data = input.array(); + ArrayType array(array_data); + + // Return early if there are zero elements or one element in the array. + // And return early if there are only nulls. + if (array.length() <= 1 || array.null_count() == array.length()) { + if (std::any_of(array.begin(), array.end(), isnan)) { + return IsMonotonicOutput(false, false, false, false, out); + } else { + // It is strictly increasing if there are zero or one elements or when nulls are + // ignored. + bool strictly = + array.length() <= 1 || + options.null_handling == IsMonotonicOptions::NullHandling::IGNORE_NULLS; + return IsMonotonicOutput(true, strictly, true, strictly, out); + } + } + + // Set null value based on option. + const CType null_value = + options.null_handling == IsMonotonicOptions::NullHandling::USE_MIN_VALUE + ? min() + : max(); + + bool increasing = true, strictly_increasing = true, decreasing = true, + strictly_decreasing = true; + + // Safety: + // - Made sure that the length is at least 2 above. + for (auto a = array.begin(), b = ++array.begin(); b != array.end();) { + auto current = *a; + auto next = *b; + + // Handle nulls. + if (options.null_handling == IsMonotonicOptions::NullHandling::IGNORE_NULLS) { + // Forward both iterators to search for a non-null value. The loop exit + // condition prevents reading past the end. + if (!current.has_value()) { + ++a; + ++b; + continue; + } + // Once we have a value for current we should also make sure that next has a + // value. The loop exit condition prevents reading past the end. + if (!next.has_value()) { + ++b; + continue; + } + } + + IsMonotonicCheck(current.value_or(null_value), next.value_or(null_value), + &increasing, &strictly_increasing, &decreasing, + &strictly_decreasing, options); + + // Early exit if all failed: + if (!increasing && !strictly_increasing && !decreasing && !strictly_decreasing) { + break; + } else { + ++a; + ++b; + } + } + + // Output + return IsMonotonicOutput(increasing, strictly_increasing, decreasing, + strictly_decreasing, out); +} + +} // namespace + +const FunctionDoc is_monotonic_doc{ + "Returns whether the array contains monotonically (strictly)" + "increasing/decreasing values", + ("Returns a StructScalar indicating whether the values in the array are \n" + "increasing, strictly increasing, decreasing and/or strictly decreasing.\n" + "Output type is struct.\n" + "Null values are ignored by default.\n" + "Implemented for arrays with well-ordered element types."), + {"array"}, + "IsMonotonicOptions"}; + +template +Status AddIsMonotonicKernel(VectorFunction* func) { + static const ValueDescr output_type = ValueDescr::Scalar(struct_({ + field("increasing", boolean()), + field("strictly_increasing", boolean()), + field("decreasing", boolean()), + field("strictly_decreasing", boolean()), + })); + VectorKernel is_monotonic_base; + is_monotonic_base.init = IsMonotonicState::Init; + is_monotonic_base.can_execute_chunkwise = false; + is_monotonic_base.signature = + KernelSignature::Make({InputType::Array(Type::type_id)}, output_type); + is_monotonic_base.exec = IsMonotonic; + return func->AddKernel(is_monotonic_base); +} + +void RegisterVectorIsMonotonic(FunctionRegistry* registry) { + static const IsMonotonicOptions default_options; + auto func = std::make_shared("is_monotonic", Arity::Unary(), + &is_monotonic_doc, &default_options); + + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Signed and unsigned integer types + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Floating point types + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Temporal types + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + DCHECK_OK(AddIsMonotonicKernel(func.get())); + + // Decimal types + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + // DCHECK_OK(AddIsMonotonicKernel(func.get())); + + DCHECK_OK(registry->AddFunction(std::move(func))); +} + +} // namespace internal + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc new file mode 100644 index 00000000000..ab0095d3359 --- /dev/null +++ b/cpp/src/arrow/compute/kernels/vector_is_monotonic_test.cc @@ -0,0 +1,229 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/api_vector.h" +#include "arrow/compute/exec.h" +#include "arrow/compute/kernels/test_util.h" +#include "arrow/datum.h" +#include "arrow/scalar.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/matchers.h" +#include "arrow/type.h" + +namespace arrow { +namespace compute { + +void Check(Datum input, bool increasing, bool strictly_increasing, bool decreasing, + bool strictly_decreasing, + const IsMonotonicOptions options = IsMonotonicOptions::Defaults()) { + ASSERT_OK_AND_ASSIGN(Datum out, CallFunction("is_monotonic", {input}, &options)); + const StructScalar& output = out.scalar_as(); + + auto out_increasing = std::static_pointer_cast(output.value[0]); + ASSERT_EQ(increasing, out_increasing->value); + auto out_strictly_increasing = std::static_pointer_cast(output.value[1]); + ASSERT_EQ(strictly_increasing, out_strictly_increasing->value); + auto out_decreasing = std::static_pointer_cast(output.value[2]); + ASSERT_EQ(decreasing, out_decreasing->value); + auto out_strictly_decreasing = std::static_pointer_cast(output.value[3]); + ASSERT_EQ(strictly_decreasing, out_strictly_decreasing->value); +} + +TEST(TestIsMonotonicKernel, VectorFunction) { + const IsMonotonicOptions min(IsMonotonicOptions::NullHandling::USE_MIN_VALUE); + const IsMonotonicOptions max(IsMonotonicOptions::NullHandling::USE_MAX_VALUE); + + // Primitive arrays + // These tests should early exit (based on length). + Check(ArrayFromJSON(int8(), "[]"), true, true, true, true); + Check(ArrayFromJSON(int8(), "[null]"), true, true, true, true); + Check(ArrayFromJSON(int8(), "[null]"), true, true, true, true, min); + Check(ArrayFromJSON(int8(), "[null]"), true, true, true, true, max); + Check(ArrayFromJSON(int8(), "[0]"), true, true, true, true); + + // Both monotonic increasing and decreasing when all values are the same. + Check(ArrayFromJSON(int8(), "[0, 0, 0, 0]"), true, false, true, false); + + Check(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, true, false); + Check(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), true, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 0, 0, 0]"), false, false, true, false, max); + + Check(ArrayFromJSON(int8(), "[0, 0, 0, null]"), true, false, true, false); + Check(ArrayFromJSON(int8(), "[0, 0, 0, null]"), false, false, true, false, min); + Check(ArrayFromJSON(int8(), "[0, 0, 0, null]"), true, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[0, null, 0, 0]"), true, false, true, false); + Check(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[0, null, 0, 0]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, null, null]"), true, true, true, true); + Check(ArrayFromJSON(int8(), "[null, null, null]"), true, false, true, false, min); + Check(ArrayFromJSON(int8(), "[null, null, null]"), true, false, true, false, max); + + // Monotonic (strictly) increasing + Check(ArrayFromJSON(int8(), "[1, 1, 3, 4]"), true, false, false, false); + + Check(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), true, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 1, 1, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 1, null, 4]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, 1, null, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 1, 3, null]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[1, 1, 3, null]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, 1, 3, null]"), true, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[-1, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[-1, 2, 3, 4, 4]"), true, false, false, false); + Check(ArrayFromJSON(int8(), "[-1, 2, 3, 4, 5]"), true, true, false, false); + + Check(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), true, true, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 2, 3, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[null, null, 3, 4]"), true, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, null, 3, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, null, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, null, 3, 4]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 2, 3, null]"), true, true, false, false); + Check(ArrayFromJSON(int8(), "[1, 2, 3, null]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[1, 2, 3, null]"), true, true, false, false, max); + + Check(ArrayFromJSON(int8(), "[1, 2, 1, 2]"), false, false, false, false); + + // Monotonic (strictly) decreasing + Check(ArrayFromJSON(int8(), "[4, 4, 2, 1]"), false, false, true, false); + + Check(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, false); + Check(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, true, false, min); + Check(ArrayFromJSON(int8(), "[4, 4, 2, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, true, false); + Check(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[4, 4, null, 1]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 4, 2, 1]"), false, false, true, true, max); + + Check(ArrayFromJSON(int8(), "[4, 3, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[5, 4, 3, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[5, 4, 3, 2, 2]"), false, false, true, false); + + Check(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, true, true, min); + Check(ArrayFromJSON(int8(), "[4, 3, 2, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, true, false, min); + Check(ArrayFromJSON(int8(), "[4, 3, null, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[4, null, 2, 1]"), false, false, false, false, max); + + Check(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, 3, 2, 1]"), false, false, true, true, max); + + Check(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, true, true); + Check(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, false, false, min); + Check(ArrayFromJSON(int8(), "[null, null, 2, 1]"), false, false, true, false, max); + + Check(ArrayFromJSON(int8(), "[4, 3, 4, 3]"), false, false, false, false); + + // Other types + // Boolean + Check(ArrayFromJSON(boolean(), "[true, true, false]"), false, false, true, false); + Check(ArrayFromJSON(boolean(), "[true, false]"), false, false, true, true); + + // Floating point + const IsMonotonicOptions approx(IsMonotonicOptions::NullHandling::IGNORE_NULLS, true, + 1e-1); + + Check(ArrayFromJSON(float32(), "[NaN]"), false, false, false, false); + Check(ArrayFromJSON(float32(), "[NaN, NaN]"), false, false, false, false); + Check(ArrayFromJSON(float32(), "[NaN, NaN, NaN]"), false, false, false, false); + Check(ArrayFromJSON(float32(), "[NaN, 1, 2, 3]"), false, false, false, false); + + Check(ArrayFromJSON(float32(), "[-Inf, 0, Inf]"), true, true, false, false); + Check(ArrayFromJSON(float32(), "[-Inf, -Inf, Inf]"), true, false, false, false); + Check(ArrayFromJSON(float32(), "[Inf, 0, -Inf]"), false, false, true, true); + Check(ArrayFromJSON(float32(), "[Inf, Inf, -Inf]"), false, false, true, false); + + Check(ArrayFromJSON(float64(), "[-Inf, Inf, null]"), false, false, false, false, min); + Check(ArrayFromJSON(float64(), "[-Inf, Inf, null]"), true, false, false, false, max); + Check(ArrayFromJSON(float64(), "[Inf, -Inf, null]"), false, false, true, false, min); + Check(ArrayFromJSON(float64(), "[Inf, -Inf, null]"), false, false, false, false, max); + + Check(ArrayFromJSON(float32(), "[-Inf, null, Inf]"), true, false, false, false, min); + Check(ArrayFromJSON(float32(), "[-Inf, null, Inf]"), true, false, false, false, max); + Check(ArrayFromJSON(float32(), "[Inf, null, -Inf]"), false, false, true, false, min); + Check(ArrayFromJSON(float32(), "[Inf, null, -Inf]"), false, false, true, false, max); + + Check(ArrayFromJSON(float32(), "[-Inf, 0, null, Inf]"), false, false, false, false, + min); + Check(ArrayFromJSON(float32(), "[-Inf, 0, null, Inf]"), true, false, false, false, max); + Check(ArrayFromJSON(float32(), "[Inf, 0, null, -Inf]"), false, false, true, false, min); + Check(ArrayFromJSON(float32(), "[Inf, 0, null, -Inf]"), false, false, false, false, + max); + + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 1.04]"), true, true, false, + false); + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 1.04]"), true, false, true, false, + approx); + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 2]"), true, true, false, false); + Check(ArrayFromJSON(float32(), "[1, 1.01, 1.02, 1.03, 2]"), true, false, false, false, + approx); + + Check(ArrayFromJSON(float32(), "[1, 2, 3, 4]"), true, true, false, false); + Check(ArrayFromJSON(float64(), "[4, 3, 2, 1]"), false, false, true, true); + + // Temporal + Check(ArrayFromJSON(time32(TimeUnit::SECOND), "[1, 2, 3, 4, 5]"), true, true, false, + false); + Check(ArrayFromJSON(time64(TimeUnit::NANO), "[5, 4, 4, 2, 1]"), false, false, true, + false); + Check(ArrayFromJSON(timestamp(TimeUnit::SECOND), + R"(["1970-01-01","2000-02-29","1900-02-28"])"), + false, false, false, false); + Check(ArrayFromJSON(timestamp(TimeUnit::MILLI, "UTC"), + R"(["1970-01-01","1971-01-01","1972-01-01"])"), + true, true, false, false); + Check(ArrayFromJSON(date32(), "[1, 2, 3, 4, null, 5]"), true, true, false, false); + Check(ArrayFromJSON(date64(), "[1, 2, 3, 4, null, 5]"), false, false, false, false, + max); + Check(ArrayFromJSON(month_interval(), "[1, 2, 3, 4, null, 5]"), true, true, false, + false); + Check(ArrayFromJSON(month_interval(), "[1, 2, 3, 4, null]"), true, true, false, false, + max); + Check(ArrayFromJSON(duration(TimeUnit::SECOND), "[1, 2, 3, 4, 5]"), true, true, false, + false); + Check(ArrayFromJSON(duration(TimeUnit::NANO), "[5, 4, 4, 2, 1]"), false, false, true, + false); +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index c6455874dbc..b2e31520aff 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -173,6 +173,7 @@ static std::unique_ptr CreateBuiltInRegistry() { // Vector functions RegisterVectorArraySort(registry.get()); RegisterVectorHash(registry.get()); + RegisterVectorIsMonotonic(registry.get()); RegisterVectorNested(registry.get()); RegisterVectorReplace(registry.get()); RegisterVectorSelection(registry.get()); diff --git a/cpp/src/arrow/compute/registry_internal.h b/cpp/src/arrow/compute/registry_internal.h index 98f61185f7b..5d5a2434a62 100644 --- a/cpp/src/arrow/compute/registry_internal.h +++ b/cpp/src/arrow/compute/registry_internal.h @@ -42,6 +42,7 @@ void RegisterScalarOptions(FunctionRegistry* registry); // Vector functions void RegisterVectorArraySort(FunctionRegistry* registry); void RegisterVectorHash(FunctionRegistry* registry); +void RegisterVectorIsMonotonic(FunctionRegistry* registry); void RegisterVectorNested(FunctionRegistry* registry); void RegisterVectorReplace(FunctionRegistry* registry); void RegisterVectorSelection(FunctionRegistry* registry); diff --git a/cpp/src/arrow/type_traits.h b/cpp/src/arrow/type_traits.h index 4b4cb5d15d3..6852fd54d47 100644 --- a/cpp/src/arrow/type_traits.h +++ b/cpp/src/arrow/type_traits.h @@ -542,6 +542,9 @@ using is_floating_type = std::is_base_of; template using enable_if_floating_point = enable_if_t::value, R>; +template +using enable_if_not_floating_point = enable_if_t::value, R>; + // Half floats are special in that they behave physically like an unsigned // integer. template