Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions c_glib/arrow-glib/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ garrow_count_options_set_property(GObject *object,
switch (prop_id) {
case PROP_MODE:
priv->options.count_mode =
static_cast<arrow::compute::CountOptions::mode>(g_value_get_enum(value));
static_cast<arrow::compute::CountOptions::Mode>(g_value_get_enum(value));
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec);
Expand Down Expand Up @@ -706,7 +706,8 @@ static void
garrow_count_options_init(GArrowCountOptions *object)
{
auto priv = GARROW_COUNT_OPTIONS_GET_PRIVATE(object);
new(&priv->options) arrow::compute::CountOptions(arrow::compute::CountOptions::COUNT_ALL);
new(&priv->options) arrow::compute::CountOptions(
arrow::compute::CountOptions::COUNT_NON_NULL);
}

static void
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ struct ValidateArrayVisitor {
if (value_size < 0) {
return Status::Invalid("FixedSizeListArray has negative value size ", value_size);
}
if (HasMultiplyOverflow(len, value_size) ||
if (HasPositiveMultiplyOverflow(len, value_size) ||
array.values()->length() != len * value_size) {
return Status::Invalid("Values Length (", array.values()->length(),
") is not equal to the length (", len,
Expand Down Expand Up @@ -329,7 +329,7 @@ Status ValidateArray(const Array& array) {
type.ToString(), ", got ", data.buffers.size());
}
// This check is required to avoid addition overflow below
if (HasAdditionOverflow(array.length(), array.offset())) {
if (HasPositiveAdditionOverflow(array.length(), array.offset())) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
}
Expand All @@ -346,7 +346,8 @@ Status ValidateArray(const Array& array) {
min_buffer_size = BitUtil::BytesForBits(array.length() + array.offset());
break;
case DataTypeLayout::FIXED_WIDTH:
if (HasMultiplyOverflow(array.length() + array.offset(), spec.byte_width)) {
if (HasPositiveMultiplyOverflow(array.length() + array.offset(),
spec.byte_width)) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
}
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/arrow/compute/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@

#pragma once

/// \defgroup compute-concrete-options Concrete option classes for compute functions
/// @{
/// @}

#include "arrow/compute/api_aggregate.h" // IWYU pragma: export
#include "arrow/compute/api_scalar.h" // IWYU pragma: export
#include "arrow/compute/api_vector.h" // IWYU pragma: export
Expand Down
61 changes: 32 additions & 29 deletions cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,47 @@ class ExecContext;
// ----------------------------------------------------------------------
// Aggregate functions

/// \class CountOptions
/// \addtogroup compute-concrete-options
/// @{

/// \brief Control Count kernel behavior
///
/// The user control the Count kernel behavior with this class. By default, the
/// it will count all non-null values.
/// By default, all non-null values are counted.
struct ARROW_EXPORT CountOptions : public FunctionOptions {
enum mode {
// Count all non-null values.
COUNT_ALL = 0,
// Count all null values.
enum Mode {
/// Count all non-null values.
COUNT_NON_NULL = 0,
/// Count all null values.
COUNT_NULL,
};

explicit CountOptions(enum mode count_mode) : count_mode(count_mode) {}
explicit CountOptions(enum Mode count_mode) : count_mode(count_mode) {}

static CountOptions Defaults() { return CountOptions(COUNT_NON_NULL); }

enum Mode count_mode = COUNT_NON_NULL;
};

/// \brief Control MinMax kernel behavior
///
/// By default, null values are ignored
struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
enum Mode {
/// Skip null values
SKIP = 0,
/// Any nulls will result in null output
OUTPUT_NULL
};

explicit MinMaxOptions(enum Mode null_handling = SKIP) : null_handling(null_handling) {}

static CountOptions Defaults() { return CountOptions(COUNT_ALL); }
static MinMaxOptions Defaults() { return MinMaxOptions{}; }

enum mode count_mode = COUNT_ALL;
enum Mode null_handling = SKIP;
};

/// @}

/// \brief Count non-null (or null) values in an array.
///
/// \param[in] options counting options, see CountOptions for more information
Expand Down Expand Up @@ -91,25 +113,6 @@ Result<Datum> Mean(const Datum& value, ExecContext* ctx = NULLPTR);
ARROW_EXPORT
Result<Datum> Sum(const Datum& value, ExecContext* ctx = NULLPTR);

/// \class MinMaxOptions
///
/// The user can control the MinMax kernel behavior with this class. By default,
/// it will skip null if there is a null value present.
struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
enum mode {
/// skip null values
SKIP = 0,
/// any nulls will result in null output
OUTPUT_NULL
};

explicit MinMaxOptions(enum mode null_handling = SKIP) : null_handling(null_handling) {}

static MinMaxOptions Defaults() { return MinMaxOptions{}; }

enum mode null_handling = SKIP;
};

/// \brief Calculate the min / max of a numeric array
///
/// This function returns both the min and max as a struct scalar, with type
Expand Down
97 changes: 52 additions & 45 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,64 @@
namespace arrow {
namespace compute {

// ----------------------------------------------------------------------
/// \addtogroup compute-concrete-options
///
/// @{

struct ArithmeticOptions : public FunctionOptions {
ArithmeticOptions() : check_overflow(false) {}
bool check_overflow;
};

struct ARROW_EXPORT BinaryContainsExactOptions : public FunctionOptions {
explicit BinaryContainsExactOptions(std::string pattern)
: pattern(std::move(pattern)) {}

/// The exact pattern to look for inside input values.
std::string pattern;
};

/// Options for IsIn and Match functions
struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
explicit SetLookupOptions(Datum value_set, bool skip_nulls)
: value_set(std::move(value_set)), skip_nulls(skip_nulls) {}

/// The set of values to look up input values into.
Datum value_set;
/// Whether nulls in `value_set` count for lookup.
///
/// If true, any null in `value_set` is ignored and nulls in the input
/// produce null (Match) or false (IsIn) values in the output.
/// If false, any null in `value_set` is successfully matched in
/// the input.
bool skip_nulls;
};

struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
explicit StrptimeOptions(std::string format, TimeUnit::type unit)
: format(format), unit(unit) {}

std::string format;
TimeUnit::type unit;
};

enum CompareOperator : int8_t {
EQUAL,
NOT_EQUAL,
GREATER,
GREATER_EQUAL,
LESS,
LESS_EQUAL,
};

struct CompareOptions : public FunctionOptions {
explicit CompareOptions(CompareOperator op) : op(op) {}

enum CompareOperator op;
};

/// @}

/// \brief Add two values together. Array values must be the same length. If
/// either addend is null the result will be null.
///
Expand Down Expand Up @@ -79,21 +130,6 @@ Result<Datum> Multiply(const Datum& left, const Datum& right,
ArithmeticOptions options = ArithmeticOptions(),
ExecContext* ctx = NULLPTR);

enum CompareOperator {
EQUAL,
NOT_EQUAL,
GREATER,
GREATER_EQUAL,
LESS,
LESS_EQUAL,
};

struct CompareOptions : public FunctionOptions {
explicit CompareOptions(CompareOperator op) : op(op) {}

enum CompareOperator op;
};

/// \brief Compare a numeric array with a scalar.
///
/// \param[in] left datum to compare, must be an Array
Expand Down Expand Up @@ -185,15 +221,6 @@ Result<Datum> KleeneOr(const Datum& left, const Datum& right, ExecContext* ctx =
ARROW_EXPORT
Result<Datum> Xor(const Datum& left, const Datum& right, ExecContext* ctx = NULLPTR);

/// For set lookup operations like IsIn, Match
struct ARROW_EXPORT SetLookupOptions : public FunctionOptions {
explicit SetLookupOptions(Datum value_set, bool skip_nulls)
: value_set(std::move(value_set)), skip_nulls(skip_nulls) {}

Datum value_set;
bool skip_nulls;
};

/// \brief IsIn returns true for each element of `values` that is contained in
/// `value_set`
///
Expand Down Expand Up @@ -274,25 +301,5 @@ ARROW_EXPORT
Result<Datum> FillNull(const Datum& values, const Datum& fill_value,
ExecContext* ctx = NULLPTR);

// ----------------------------------------------------------------------
// String functions

struct ARROW_EXPORT BinaryContainsExactOptions : public FunctionOptions {
explicit BinaryContainsExactOptions(std::string pattern) : pattern(pattern) {}

std::string pattern;
};

// ----------------------------------------------------------------------
// Temporal functions

struct ARROW_EXPORT StrptimeOptions : public FunctionOptions {
explicit StrptimeOptions(std::string format, TimeUnit::type unit)
: format(format), unit(unit) {}

std::string format;
TimeUnit::type unit;
};

} // namespace compute
} // namespace arrow
37 changes: 23 additions & 14 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ namespace compute {

class ExecContext;

/// \addtogroup compute-concrete-options
/// @{

struct FilterOptions : public FunctionOptions {
/// Configure the action taken when a slot of the selection mask is null
enum NullSelectionBehavior {
Expand All @@ -46,6 +49,25 @@ struct FilterOptions : public FunctionOptions {
NullSelectionBehavior null_selection_behavior = DROP;
};

struct ARROW_EXPORT TakeOptions : public FunctionOptions {
explicit TakeOptions(bool boundscheck = true) : boundscheck(boundscheck) {}

bool boundscheck = true;
static TakeOptions BoundsCheck() { return TakeOptions(true); }
static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
static TakeOptions Defaults() { return BoundsCheck(); }
};

/// \brief Partitioning options for NthToIndices
struct PartitionOptions : public FunctionOptions {
explicit PartitionOptions(int64_t pivot) : pivot(pivot) {}

/// The index into the equivalent sorted array of the partition pivot element.
int64_t pivot;
};

/// @}

/// \brief Filter with a boolean selection filter
///
/// The output will be populated with values from the input at positions
Expand Down Expand Up @@ -85,15 +107,6 @@ Result<std::shared_ptr<ArrayData>> GetTakeIndices(

} // namespace internal

struct ARROW_EXPORT TakeOptions : public FunctionOptions {
explicit TakeOptions(bool boundscheck = true) : boundscheck(boundscheck) {}

bool boundscheck = true;
static TakeOptions BoundsCheck() { return TakeOptions(true); }
static TakeOptions NoBoundsCheck() { return TakeOptions(false); }
static TakeOptions Defaults() { return BoundsCheck(); }
};

/// \brief Take from an array of values at indices in another array
///
/// The output array will be of the same type as the input values
Expand Down Expand Up @@ -121,11 +134,6 @@ Result<std::shared_ptr<Array>> Take(const Array& values, const Array& indices,
const TakeOptions& options = TakeOptions::Defaults(),
ExecContext* ctx = NULLPTR);

struct PartitionOptions : public FunctionOptions {
explicit PartitionOptions(int64_t pivot) : pivot(pivot) {}
int64_t pivot;
};

/// \brief Returns indices that partition an array around n-th
/// sorted element.
///
Expand Down Expand Up @@ -178,6 +186,7 @@ ARROW_EXPORT extern const char kValuesFieldName[];
ARROW_EXPORT extern const char kCountsFieldName[];
ARROW_EXPORT extern const int32_t kValuesFieldIndex;
ARROW_EXPORT extern const int32_t kCountsFieldIndex;

/// \brief Return counts of unique elements from an array-like object.
///
/// Note that the counts do not include counts for nulls in the array. These can be
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ Result<const ScalarKernel*> CastFunction::DispatchExact(

// Validate arity
if (passed_num_args != 1) {
return Status::Invalid("Cast sunctions accept 1 argument but passed ",
return Status::Invalid("Cast functions accept 1 argument but passed ",
passed_num_args);
}
std::vector<const ScalarKernel*> candidate_kernels;
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/compute/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ namespace compute {

class ExecContext;

/// \addtogroup compute-concrete-options
/// @{

struct ARROW_EXPORT CastOptions : public FunctionOptions {
CastOptions()
: allow_int_overflow(false),
Expand Down Expand Up @@ -73,6 +76,8 @@ struct ARROW_EXPORT CastOptions : public FunctionOptions {
bool allow_invalid_utf8;
};

/// @}

// Cast functions are _not_ registered in the FunctionRegistry, though they use
// the same execution machinery
class CastFunction : public ScalarFunction {
Expand Down
Loading