Skip to content
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ if(ARROW_COMPUTE)
compute/kernels/util_internal.cc
compute/kernels/vector_array_sort.cc
compute/kernels/vector_hash.cc
compute/kernels/vector_is_monotonic.cc
compute/kernels/vector_nested.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_selection.cc
Expand Down
39 changes: 39 additions & 0 deletions cpp/src/arrow/compute/api_vector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ namespace internal {

using compute::DictionaryEncodeOptions;
using compute::FilterOptions;
using compute::IsMonotonicOptions;
using compute::NullPlacement;

template <>
Expand Down Expand Up @@ -88,6 +89,25 @@ struct EnumTraits<NullPlacement>
return "<INVALID>";
}
};
template <>
struct EnumTraits<IsMonotonicOptions::NullHandling>
: BasicEnumTraits<IsMonotonicOptions::NullHandling,
IsMonotonicOptions::NullHandling::IGNORE_NULLS,
IsMonotonicOptions::NullHandling::USE_MIN_VALUE,
IsMonotonicOptions::NullHandling::USE_MAX_VALUE> {
static std::string name() { return "IsMonotonicOptions::NullHandling"; }
static std::string value_name(IsMonotonicOptions::NullHandling value) {
switch (value) {
case IsMonotonicOptions::NullHandling::IGNORE_NULLS:
return "IGNORE_NULLS";
case IsMonotonicOptions::NullHandling::USE_MIN_VALUE:
return "USE_MIN_VALUE";
case IsMonotonicOptions::NullHandling::USE_MAX_VALUE:
return "USE_MAX_VALUE";
}
return "<INVALID>";
}
};

} // namespace internal

Expand Down Expand Up @@ -135,6 +155,11 @@ static auto kPartitionNthOptionsType = GetFunctionOptionsType<PartitionNthOption
static auto kSelectKOptionsType = GetFunctionOptionsType<SelectKOptions>(
DataMember("k", &SelectKOptions::k),
DataMember("sort_keys", &SelectKOptions::sort_keys));
static auto kIsMonotonicOptionsType = GetFunctionOptionsType<IsMonotonicOptions>(
DataMember("null_handling", &IsMonotonicOptions::null_handling),
DataMember("floating_approximate", &IsMonotonicOptions::floating_approximate),
DataMember("epsilon", &IsMonotonicOptions::epsilon));

} // namespace
} // namespace internal

Expand Down Expand Up @@ -176,6 +201,14 @@ SelectKOptions::SelectKOptions(int64_t k, std::vector<SortKey> sort_keys)
sort_keys(std::move(sort_keys)) {}
constexpr char SelectKOptions::kTypeName[];

IsMonotonicOptions::IsMonotonicOptions(IsMonotonicOptions::NullHandling null_handling,
bool floating_approximate, double epsilon)
: FunctionOptions(internal::kIsMonotonicOptionsType),
null_handling(null_handling),
floating_approximate(floating_approximate),
epsilon(epsilon) {}
constexpr char IsMonotonicOptions::kTypeName[];

namespace internal {
void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType));
Expand All @@ -185,6 +218,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) {
DCHECK_OK(registry->AddFunctionOptionsType(kSortOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kPartitionNthOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kSelectKOptionsType));
DCHECK_OK(registry->AddFunctionOptionsType(kIsMonotonicOptionsType));
}
} // namespace internal

Expand Down Expand Up @@ -280,6 +314,11 @@ Result<std::shared_ptr<StructArray>> ValueCounts(const Datum& value, ExecContext
return checked_pointer_cast<StructArray>(result.make_array());
}

Result<Datum> IsMonotonic(const Datum& value, const IsMonotonicOptions& options,
ExecContext* ctx) {
return CallFunction("is_monotonic", {value}, &options, ctx);
}

// ----------------------------------------------------------------------
// Filter- and take-related selection functions

Expand Down
51 changes: 51 additions & 0 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,35 @@ class ARROW_EXPORT PartitionNthOptions : public FunctionOptions {
NullPlacement null_placement;
};

/// \brief Options for IsMonotonic
class ARROW_EXPORT IsMonotonicOptions : public FunctionOptions {
public:
enum NullHandling {
/// Ignore nulls.
IGNORE_NULLS,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Based on the other enum names, use only IGNORE, since enum NullHandling already specifies this is for nulls.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had that initially, but IGNORE caused compilation issues on Windows.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, nevermind.

/// Use min value of element type as the value of nulls.
/// -Inf for floating point numbers.
USE_MIN_VALUE,
/// Use max value of element type as the value of nulls.
/// Inf for floating point numbers.
USE_MAX_VALUE
};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ordering of nulls and NaNs were also discussed in the sorting function. I would expect that IsMonotonic and sorting function are consistent. That is, if a sort operation is performed first, then the corresponding IsMonotonic should result in true.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I agree that a sort before invoking this kernel should result in true for the corresponding check. However I feel the null handling variants are a bit confusing: AtStart defines NaN > null and AtEnd defines NaN < null. Also, the sorting kernel can ignore equality, but this kernels considers it to check if values are unique (strictly increasing/decreasing).

I think if we want to allow users to define order of unordered values (both for sorting and this kernel) we need something like this:

bool compare_nulls = false; // default: any null results in false outputs (or error in case of sort)
bool compare_nans = false; // default: any nan results in false outputs (or error in case of sort)

// these are not needed when sorting
bool nulls_equal = false; // when nulls are compared, are they considered equal?
bool nans_equal = false; // when nans are compared, are they considered equal?

// when both nulls and nans are compared
enum Ordering { Less, Equal, Greater }
Ordering nan_compared_with_null; // when comparing nulls and nans, what ordering should be used?


explicit IsMonotonicOptions(NullHandling null_handling = IGNORE_NULLS,
bool floating_approximate = false,
double epsilon = kDefaultAbsoluteTolerance);
constexpr static char const kTypeName[] = "IsMonotonicOptions";
static IsMonotonicOptions Defaults() { return IsMonotonicOptions(); }

/// Define how nulls are handled.
NullHandling null_handling = IGNORE_NULLS;
/// Whether or not to use approximate floating point number comparisons.
bool floating_approximate = false;
/// Epsilon (error bound) value used when approximately comparing floating points
/// numbers.
double epsilon = kDefaultAbsoluteTolerance;
};

/// @}

/// \brief Filter with a boolean selection filter
Expand Down Expand Up @@ -494,6 +523,28 @@ Result<Datum> DictionaryEncode(
const DictionaryEncodeOptions& options = DictionaryEncodeOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Returns information about the monotonicity of the elements in an
/// array with well-ordered elements.
///
/// Returns a struct scalar with type
/// struct<
/// increasing: boolean,
/// strictly_increasing: boolean,
/// decreasing: boolean,
/// strictly_decreasing: boolean
/// >
///
/// \param[in] data input data.
/// \param[in] options see IsMonotonicOptions for more information.
/// \param[in] ctx the function execution context, optional.
/// \return resulting datum as a struct scalar.
///
/// \since x.0.0 \note API not yet finalized
ARROW_EXPORT
Result<Datum> IsMonotonic(
const Datum& data, const IsMonotonicOptions& options = IsMonotonicOptions::Defaults(),
ExecContext* ctx = NULLPTR);

// ----------------------------------------------------------------------
// Deprecated functions

Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/kernels/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
add_arrow_compute_test(vector_test
SOURCES
vector_hash_test.cc
vector_is_monotonic_test.cc
vector_nested_test.cc
vector_replace_test.cc
vector_selection_test.cc
Expand Down
Loading