Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ struct ARROW_EXPORT MinMaxOptions : public FunctionOptions {
/// Skip null values
SKIP = 0,
/// Any nulls will result in null output
OUTPUT_NULL
EMIT_NULL
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we simply change this? (is it public on the C++ side?) And it is changed for consistency with how it's called elsewhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The next version will be 2.0.0, meaning it's ok to break APIs. Of course, we should try to minimize breakage, but this one aims to fix an inconsistency.

};

explicit MinMaxOptions(enum Mode null_handling = SKIP) : null_handling(null_handling) {}
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -952,9 +952,6 @@ Result<Datum> CallFunction(const std::string& func_name, const std::vector<Datum
}
ARROW_ASSIGN_OR_RAISE(std::shared_ptr<const Function> func,
ctx->func_registry()->GetFunction(func_name));
if (options == nullptr) {
options = func->default_options();
}
return func->Execute(args, options, ctx);
}

Expand Down
6 changes: 6 additions & 0 deletions cpp/src/arrow/compute/function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ Result<const KernelType*> DispatchExactImpl(const Function& func,

Result<Datum> Function::Execute(const std::vector<Datum>& args,
const FunctionOptions* options, ExecContext* ctx) const {
if (options == nullptr) {
options = default_options();
}
if (ctx == nullptr) {
ExecContext default_ctx;
return Execute(args, options, &default_ctx);
Expand Down Expand Up @@ -189,6 +192,9 @@ Result<Datum> MetaFunction::Execute(const std::vector<Datum>& args,
const FunctionOptions* options,
ExecContext* ctx) const {
RETURN_NOT_OK(CheckArity(static_cast<int>(args.size())));
if (options == nullptr) {
options = default_options();
}
return ExecuteImpl(args, options, ctx);
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/compute/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ struct ARROW_EXPORT Arity {
/// invoking the function
static Arity VarArgs(int min_args = 0) { return Arity(min_args, true); }

explicit Arity(int num_args, bool is_varargs = false)
// NOTE: the 0-argument form (default constructor) is required for Cython
explicit Arity(int num_args = 0, bool is_varargs = false)
: num_args(num_args), is_varargs(is_varargs) {}

/// The number of required arguments (or the minimum number for varargs
Expand Down Expand Up @@ -124,8 +125,7 @@ class ARROW_EXPORT Function {
/// kernel dispatch, batch iteration, and memory allocation details taken
/// care of.
///
/// Function implementations may assume that options is non-null and valid
/// or to forgo options and accept only nullptr for that argument.
/// If the `options` pointer is null, then `default_options()` will be used.
///
/// This function can be overridden in subclasses.
virtual Result<Datum> Execute(const std::vector<Datum>& args,
Expand Down
5 changes: 5 additions & 0 deletions cpp/src/arrow/compute/kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ std::string InputType::ToString() const {
}
ss << "[";
switch (kind_) {
case InputType::ANY_TYPE:
ss << "any";
break;
case InputType::EXACT_TYPE:
ss << type_->ToString();
break;
Expand All @@ -303,6 +306,8 @@ bool InputType::Equals(const InputType& other) const {
return false;
}
switch (kind_) {
case InputType::ANY_TYPE:
return true;
case InputType::EXACT_TYPE:
return type_->Equals(*other.type_);
case InputType::USE_TYPE_MATCHER:
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/compute/kernel_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,15 @@ TEST(InputType, Constructors) {

InputType ty7(match::TimestampTypeUnit(TimeUnit::MICRO));
ASSERT_EQ("any[timestamp(us)]", ty7.ToString());

InputType ty8;
InputType ty9(ValueDescr::ANY);
InputType ty10(ValueDescr::ARRAY);
InputType ty11(ValueDescr::SCALAR);
ASSERT_EQ("any[any]", ty8.ToString());
ASSERT_EQ("any[any]", ty9.ToString());
ASSERT_EQ("array[any]", ty10.ToString());
ASSERT_EQ("scalar[any]", ty11.ToString());
}

TEST(InputType, Equals) {
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ struct MinMaxImpl : public ScalarAggregator {
local.has_nulls = null_count > 0;
local.has_values = (arr.length() - null_count) > 0;

if (local.has_nulls && options.null_handling == MinMaxOptions::OUTPUT_NULL) {
if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
this->state = local;
return;
}
Expand All @@ -443,7 +443,7 @@ struct MinMaxImpl : public ScalarAggregator {

std::vector<std::shared_ptr<Scalar>> values;
if (!state.has_values ||
(state.has_nulls && options.null_handling == MinMaxOptions::OUTPUT_NULL)) {
(state.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL)) {
// (null, null)
values = {std::make_shared<ScalarType>(), std::make_shared<ScalarType>()};
} else {
Expand Down Expand Up @@ -533,7 +533,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {

local.has_nulls = null_count > 0;
local.has_values = valid_count > 0;
if (local.has_nulls && options.null_handling == MinMaxOptions::OUTPUT_NULL) {
if (local.has_nulls && options.null_handling == MinMaxOptions::EMIT_NULL) {
this->state = local;
return;
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/compute/kernels/aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ TEST_F(TestBooleanMinMaxKernel, Basics) {
this->AssertMinMaxIs(chunked_input2, false, false, options);
this->AssertMinMaxIs(chunked_input3, false, true, options);

options = MinMaxOptions(MinMaxOptions::OUTPUT_NULL);
options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
this->AssertMinMaxIsNull("[]", options);
this->AssertMinMaxIsNull("[null, null, null]", options);
this->AssertMinMaxIsNull("[false, null, false]", options);
Expand Down Expand Up @@ -543,7 +543,7 @@ TYPED_TEST(TestIntegerMinMaxKernel, Basics) {
this->AssertMinMaxIs(chunked_input2, 1, 9, options);
this->AssertMinMaxIs(chunked_input3, 1, 9, options);

options = MinMaxOptions(MinMaxOptions::OUTPUT_NULL);
options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
// output null
this->AssertMinMaxIsNull("[5, null, 2, 3, 4]", options);
Expand All @@ -570,7 +570,7 @@ TYPED_TEST(TestFloatingMinMaxKernel, Floats) {
this->AssertMinMaxIs(chunked_input2, 1, 9, options);
this->AssertMinMaxIs(chunked_input3, 1, 9, options);

options = MinMaxOptions(MinMaxOptions::OUTPUT_NULL);
options = MinMaxOptions(MinMaxOptions::EMIT_NULL);
this->AssertMinMaxIs("[5, 1, 2, 3, 4]", 1, 5, options);
this->AssertMinMaxIs("[5, -Inf, 2, 3, 4]", -INFINITY, 5, options);
// output null
Expand Down
31 changes: 18 additions & 13 deletions cpp/src/arrow/compute/kernels/scalar_set_lookup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,9 @@ class IsInMetaBinary : public MetaFunction {
Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
const FunctionOptions* options,
ExecContext* ctx) const override {
DCHECK_EQ(options, nullptr);
if (options != nullptr) {
return Status::Invalid("Unexpected options for 'is_in_meta_binary' function");
}
return IsIn(args[0], args[1], ctx);
}
};
Expand All @@ -417,7 +419,9 @@ class IndexInMetaBinary : public MetaFunction {
Result<Datum> ExecuteImpl(const std::vector<Datum>& args,
const FunctionOptions* options,
ExecContext* ctx) const override {
DCHECK_EQ(options, nullptr);
if (options != nullptr) {
return Status::Invalid("Unexpected options for 'index_in_meta_binary' function");
}
return IndexIn(args[0], args[1], ctx);
}
};
Expand All @@ -444,17 +448,18 @@ void RegisterScalarSetLookup(FunctionRegistry* registry) {

// IndexIn uses Int32Builder and so is responsible for all its own allocation
{
ScalarKernel match_base;
match_base.init = InitSetLookup;
match_base.exec = ExecIndexIn;
match_base.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
match_base.mem_allocation = MemAllocation::NO_PREALLOCATE;
auto match = std::make_shared<ScalarFunction>("index_in", Arity::Unary());
AddBasicSetLookupKernels(match_base, /*output_type=*/int32(), match.get());

match_base.signature = KernelSignature::Make({null()}, int32());
DCHECK_OK(match->AddKernel(match_base));
DCHECK_OK(registry->AddFunction(match));
ScalarKernel index_in_base;
index_in_base.init = InitSetLookup;
index_in_base.exec = ExecIndexIn;
index_in_base.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
index_in_base.mem_allocation = MemAllocation::NO_PREALLOCATE;
auto index_in = std::make_shared<ScalarFunction>("index_in", Arity::Unary());

AddBasicSetLookupKernels(index_in_base, /*output_type=*/int32(), index_in.get());

index_in_base.signature = KernelSignature::Make({null()}, int32());
DCHECK_OK(index_in->AddKernel(index_in_base));
DCHECK_OK(registry->AddFunction(index_in));

DCHECK_OK(registry->AddFunction(std::make_shared<IndexInMetaBinary>()));
}
Expand Down
2 changes: 1 addition & 1 deletion docs/source/cpp/compute.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Some functions accept or require an options structure that determines the
exact semantics of the function::

MinMaxOptions options;
options.null_handling = MinMaxOptions::OUTPUT_NULL;
options.null_handling = MinMaxOptions::EMIT_NULL;

std::shared_ptr<arrow::Array> array = ...;
arrow::Datum min_max_datum;
Expand Down
Loading