Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/src/arrow/compute/kernels/hash_aggregate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -483,9 +483,11 @@ struct GroupedCountImpl : public GroupedAggregator {
const auto& input = batch[0].array();

if (options_.count_mode == CountOptions::COUNT_NULL) {
for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
auto g = group_ids[i];
raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
if (input->GetNullCount() != 0) {
for (int64_t i = 0, input_i = input->offset; i < input->length; ++i, ++input_i) {
auto g = group_ids[i];
raw_counts[g] += !BitUtil::GetBit(input->buffers[0]->data(), input_i);
}
}
return Status::OK();
}
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/arrow/compute/kernels/hash_aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,29 @@ TEST(GroupBy, ConcreteCaseWithValidateGroupBy) {
}
}

// Count nulls/non_nulls from record batch with no nulls
TEST(GroupBy, CountNull) {
auto batch = RecordBatchFromJSON(
schema({field("argument", float64()), field("key", utf8())}), R"([
[1.0, "alfa"],
[2.0, "beta"],
[3.0, "gama"]
])");

CountOptions count_non_null{CountOptions::COUNT_NON_NULL},
count_null{CountOptions::COUNT_NULL};

using internal::Aggregate;
for (auto agg : {
Aggregate{"hash_count", &count_non_null},
Aggregate{"hash_count", &count_null},
}) {
SCOPED_TRACE(agg.function);
ValidateGroupBy({agg}, {batch->GetColumnByName("argument")},
{batch->GetColumnByName("key")});
}
}

TEST(GroupBy, RandomArraySum) {
for (int64_t length : {1 << 10, 1 << 12, 1 << 15}) {
for (auto null_probability : {0.0, 0.01, 0.5, 1.0}) {
Expand Down
72 changes: 54 additions & 18 deletions cpp/src/arrow/compute/kernels/scalar_boolean.cc
Original file line number Diff line number Diff line change
Expand Up @@ -169,21 +169,32 @@ struct KleeneAnd : Commutative<KleeneAnd> {
bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;

if (right_false) {
GetBitmap(*out, 0).SetBitsTo(true);
out->null_count = 0;
out->buffers[0] = nullptr;
GetBitmap(*out, 1).SetBitsTo(false); // all false case
return Status::OK();
}

if (right_true) {
GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
if (left.GetNullCount() == 0) {
out->null_count = 0;
out->buffers[0] = nullptr;
} else {
GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
}
GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
return Status::OK();
}

// scalar was null: out[i] is valid iff left[i] was false
::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
left.buffers[1]->data(), left.offset, left.length,
out->offset, out->buffers[0]->mutable_data());
if (left.GetNullCount() == 0) {
::arrow::internal::InvertBitmap(left.buffers[1]->data(), left.offset, left.length,
out->buffers[0]->mutable_data(), out->offset);
} else {
::arrow::internal::BitmapAndNot(left.buffers[0]->data(), left.offset,
left.buffers[1]->data(), left.offset, left.length,
out->offset, out->buffers[0]->mutable_data());
}
::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
out->buffers[1]->mutable_data(), out->offset);
return Status::OK();
Expand All @@ -192,7 +203,8 @@ struct KleeneAnd : Commutative<KleeneAnd> {
static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
ArrayData* out) {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
GetBitmap(*out, 0).SetBitsTo(true);
out->null_count = 0;
out->buffers[0] = nullptr;
return And::Call(ctx, left, right, out);
}
auto compute_word = [](uint64_t left_true, uint64_t left_false, uint64_t right_true,
Expand Down Expand Up @@ -260,21 +272,32 @@ struct KleeneOr : Commutative<KleeneOr> {
bool right_false = right.is_valid && !checked_cast<const BooleanScalar&>(right).value;

if (right_true) {
GetBitmap(*out, 0).SetBitsTo(true);
out->null_count = 0;
out->buffers[0] = nullptr;
GetBitmap(*out, 1).SetBitsTo(true); // all true case
return Status::OK();
}

if (right_false) {
GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
if (left.GetNullCount() == 0) {
out->null_count = 0;
out->buffers[0] = nullptr;
} else {
GetBitmap(*out, 0).CopyFrom(GetBitmap(left, 0));
}
GetBitmap(*out, 1).CopyFrom(GetBitmap(left, 1));
return Status::OK();
}

// scalar was null: out[i] is valid iff left[i] was true
::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
left.buffers[1]->data(), left.offset, left.length,
out->offset, out->buffers[0]->mutable_data());
if (left.GetNullCount() == 0) {
::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
out->buffers[0]->mutable_data(), out->offset);
} else {
::arrow::internal::BitmapAnd(left.buffers[0]->data(), left.offset,
left.buffers[1]->data(), left.offset, left.length,
out->offset, out->buffers[0]->mutable_data());
}
::arrow::internal::CopyBitmap(left.buffers[1]->data(), left.offset, left.length,
out->buffers[1]->mutable_data(), out->offset);
return Status::OK();
Expand All @@ -283,7 +306,8 @@ struct KleeneOr : Commutative<KleeneOr> {
static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
ArrayData* out) {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
GetBitmap(*out, 0).SetBitsTo(true);
out->null_count = 0;
out->buffers[0] = nullptr;
return Or::Call(ctx, left, right, out);
}

Expand Down Expand Up @@ -373,21 +397,32 @@ struct KleeneAndNot {
bool left_false = left.is_valid && !checked_cast<const BooleanScalar&>(left).value;

if (left_false) {
GetBitmap(*out, 0).SetBitsTo(true);
out->null_count = 0;
out->buffers[0] = nullptr;
GetBitmap(*out, 1).SetBitsTo(false); // all false case
return Status::OK();
}

if (left_true) {
GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
if (right.GetNullCount() == 0) {
out->null_count = 0;
out->buffers[0] = nullptr;
} else {
GetBitmap(*out, 0).CopyFrom(GetBitmap(right, 0));
}
GetBitmap(*out, 1).CopyFromInverted(GetBitmap(right, 1));
return Status::OK();
}

// scalar was null: out[i] is valid iff right[i] was true
::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
right.buffers[1]->data(), right.offset, right.length,
out->offset, out->buffers[0]->mutable_data());
if (right.GetNullCount() == 0) {
::arrow::internal::CopyBitmap(right.buffers[1]->data(), right.offset, right.length,
out->buffers[0]->mutable_data(), out->offset);
} else {
::arrow::internal::BitmapAnd(right.buffers[0]->data(), right.offset,
right.buffers[1]->data(), right.offset, right.length,
out->offset, out->buffers[0]->mutable_data());
}
::arrow::internal::InvertBitmap(right.buffers[1]->data(), right.offset, right.length,
out->buffers[1]->mutable_data(), out->offset);
return Status::OK();
Expand All @@ -401,7 +436,8 @@ struct KleeneAndNot {
static Status Call(KernelContext* ctx, const ArrayData& left, const ArrayData& right,
ArrayData* out) {
if (left.GetNullCount() == 0 && right.GetNullCount() == 0) {
GetBitmap(*out, 0).SetBitsTo(true);
out->null_count = 0;
out->buffers[0] = nullptr;
return AndNot::Call(ctx, left, right, out);
}

Expand Down
18 changes: 18 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_boolean_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ TEST(TestBooleanKernel, KleeneAnd) {
expected = ArrayFromJSON(boolean(), "[true, false, false, null, false]");
CheckScalarBinary("and_kleene", left, right, expected);
CheckBooleanScalarArrayBinary("and_kleene", left);

left = ArrayFromJSON(boolean(), " [true, true, false, true]");
right = ArrayFromJSON(boolean(), " [true, false, false, false]");
expected = ArrayFromJSON(boolean(), "[true, false, false, false]");
CheckScalarBinary("and_kleene", left, right, expected);
CheckBooleanScalarArrayBinary("and_kleene", left);
}

TEST(TestBooleanKernel, KleeneAndNot) {
Expand All @@ -121,6 +127,12 @@ TEST(TestBooleanKernel, KleeneAndNot) {
boolean(), "[false, true, null, false, false, false, false, null, null]");
CheckScalarBinary("and_not_kleene", left, right, expected);
CheckBooleanScalarArrayBinary("and_not_kleene", left);

left = ArrayFromJSON(boolean(), " [true, true, false, false]");
right = ArrayFromJSON(boolean(), " [true, false, true, false]");
expected = ArrayFromJSON(boolean(), "[false, true, false, false]");
CheckScalarBinary("and_not_kleene", left, right, expected);
CheckBooleanScalarArrayBinary("and_not_kleene", left);
}

TEST(TestBooleanKernel, KleeneOr) {
Expand All @@ -135,6 +147,12 @@ TEST(TestBooleanKernel, KleeneOr) {
expected = ArrayFromJSON(boolean(), "[true, true, false, true, null]");
CheckScalarBinary("or_kleene", left, right, expected);
CheckBooleanScalarArrayBinary("or_kleene", left);

left = ArrayFromJSON(boolean(), " [true, true, false, false]");
right = ArrayFromJSON(boolean(), " [true, false, false, true]");
expected = ArrayFromJSON(boolean(), "[true, true, false, true]");
CheckScalarBinary("or_kleene", left, right, expected);
CheckBooleanScalarArrayBinary("or_kleene", left);
}

} // namespace compute
Expand Down
8 changes: 5 additions & 3 deletions cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,11 @@ Status CastListExec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
Datum values = in_array.child_data[0];

if (in_array.offset != 0) {
ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
in_array.offset, in_array.length));
if (in_array.buffers[0]) {
ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
CopyBitmap(ctx->memory_pool(), in_array.buffers[0]->data(),
in_array.offset, in_array.length));
}
ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
ctx->Allocate(sizeof(offset_type) * (in_array.length + 1)));

Expand Down
14 changes: 14 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,20 @@ TEST(Cast, ListToList) {
CheckCast(MakeArray(list_float32), MakeArray(list_int32));
CheckCast(MakeArray(list_int64), MakeArray(list_int32));
}

// No nulls (ARROW-12568)
for (auto make_list : std::vector<make_list_t*>{&list, &large_list}) {
auto list_int32 = ArrayFromJSON(make_list(int32()),
"[[0], [1], [2, 3, 4], [5, 6], [], [7], [8, 9]]")
->data();
auto list_int64 = list_int32->Copy();
list_int64->type = make_list(int64());
list_int64->child_data[0] = Cast(list_int32->child_data[0], int64())->array();
ASSERT_OK(MakeArray(list_int64)->ValidateFull());

CheckCast(MakeArray(list_int32), MakeArray(list_int64));
CheckCast(MakeArray(list_int64), MakeArray(list_int32));
}
}

TEST(Cast, ListToListOptionsPassthru) {
Expand Down