Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,9 @@ class KernelExecutorImpl : public KernelExecutor {
if (validity_preallocated_) {
ARROW_ASSIGN_OR_RAISE(out->buffers[0], kernel_ctx_->AllocateBitmap(length));
}
if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) {
out->null_count = 0;
}
for (size_t i = 0; i < data_preallocated_.size(); ++i) {
const auto& prealloc = data_preallocated_[i];
if (prealloc.bit_width >= 0) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/kernels/util_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr) {
arg.data += arr.offset * arg.bit_width / 8;
}
// This may be kUnknownNullCount
arg.null_count = arr.null_count.load();
arg.null_count = (arg.is_valid != nullptr) ? arr.null_count.load() : 0;
return arg;
}

Expand Down
82 changes: 65 additions & 17 deletions cpp/src/arrow/compute/kernels/vector_selection_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -947,10 +947,37 @@ uint64_t GetMaxIndex(int64_t values_length) {
return static_cast<uint64_t>(values_length - 1);
}

class TestTakeKernel : public ::testing::Test {
public:
void TestNoValidityBitmapButUnknownNullCount(const std::shared_ptr<Array>& values,
const std::shared_ptr<Array>& indices) {
ASSERT_EQ(values->null_count(), 0);
ASSERT_EQ(indices->null_count(), 0);
auto expected = (*Take(values, indices)).make_array();

auto new_values = MakeArray(values->data()->Copy());
new_values->data()->buffers[0].reset();
new_values->data()->null_count = kUnknownNullCount;
auto new_indices = MakeArray(indices->data()->Copy());
new_indices->data()->buffers[0].reset();
new_indices->data()->null_count = kUnknownNullCount;
auto result = (*Take(new_values, new_indices)).make_array();

AssertArraysEqual(*expected, *result);
}

void TestNoValidityBitmapButUnknownNullCount(const std::shared_ptr<DataType>& type,
const std::string& values,
const std::string& indices) {
TestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values),
ArrayFromJSON(int16(), indices));
}
};

template <typename ArrowType>
class TestTakeKernel : public ::testing::Test {};
class TestTakeKernelTyped : public TestTakeKernel {};

TEST(TestTakeKernel, TakeNull) {
TEST_F(TestTakeKernel, TakeNull) {
AssertTakeNull("[null, null, null]", "[0, 1, 0]", "[null, null, null]");
AssertTakeNull("[null, null, null]", "[0, 2]", "[null, null]");

Expand All @@ -961,13 +988,13 @@ TEST(TestTakeKernel, TakeNull) {
TakeJSON(boolean(), "[null, null, null]", int8(), "[0, -1, 0]", &arr));
}

TEST(TestTakeKernel, InvalidIndexType) {
TEST_F(TestTakeKernel, InvalidIndexType) {
std::shared_ptr<Array> arr;
ASSERT_RAISES(NotImplemented, TakeJSON(null(), "[null, null, null]", float32(),
"[0.0, 1.0, 0.1]", &arr));
}

TEST(TestTakeKernel, DefaultOptions) {
TEST_F(TestTakeKernel, DefaultOptions) {
auto indices = ArrayFromJSON(int8(), "[null, 2, 0, 3]");
auto values = ArrayFromJSON(int8(), "[7, 8, 9, null]");
ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("take", {values, indices}));
Expand All @@ -979,12 +1006,14 @@ TEST(TestTakeKernel, DefaultOptions) {
AssertDatumsEqual(explicit_defaults, no_options_provided);
}

TEST(TestTakeKernel, TakeBoolean) {
TEST_F(TestTakeKernel, TakeBoolean) {
AssertTakeBoolean("[7, 8, 9]", "[]", "[]");
AssertTakeBoolean("[true, false, true]", "[0, 1, 0]", "[true, false, true]");
AssertTakeBoolean("[null, false, true]", "[0, 1, 0]", "[null, false, null]");
AssertTakeBoolean("[true, false, true]", "[null, 1, 0]", "[null, false, true]");

TestNoValidityBitmapButUnknownNullCount(boolean(), "[true, false, true]", "[1, 0, 0]");

std::shared_ptr<Array> arr;
ASSERT_RAISES(IndexError,
TakeJSON(boolean(), "[true, false, true]", int8(), "[0, 9, 0]", &arr));
Expand All @@ -993,7 +1022,7 @@ TEST(TestTakeKernel, TakeBoolean) {
}

template <typename ArrowType>
class TestTakeKernelWithNumeric : public TestTakeKernel<ArrowType> {
class TestTakeKernelWithNumeric : public TestTakeKernelTyped<ArrowType> {
protected:
void AssertTake(const std::string& values, const std::string& indices,
const std::string& expected) {
Expand Down Expand Up @@ -1022,7 +1051,7 @@ TYPED_TEST(TestTakeKernelWithNumeric, TakeNumeric) {
}

template <typename TypeClass>
class TestTakeKernelWithString : public TestTakeKernel<TypeClass> {
class TestTakeKernelWithString : public TestTakeKernelTyped<TypeClass> {
public:
std::shared_ptr<DataType> value_type() {
return TypeTraits<TypeClass>::type_singleton();
Expand Down Expand Up @@ -1057,6 +1086,9 @@ TYPED_TEST(TestTakeKernelWithString, TakeString) {
this->AssertTake(R"([null, "b", "c"])", "[0, 1, 0]", "[null, \"b\", null]");
this->AssertTake(R"(["a", "b", "c"])", "[null, 1, 0]", R"([null, "b", "a"])");

this->TestNoValidityBitmapButUnknownNullCount(this->value_type(), R"(["a", "b", "c"])",
"[0, 1, 0]");

std::shared_ptr<DataType> type = this->value_type();
std::shared_ptr<Array> arr;
ASSERT_RAISES(IndexError,
Expand All @@ -1072,7 +1104,7 @@ TYPED_TEST(TestTakeKernelWithString, TakeDictionary) {
this->AssertTakeDictionary(dict, "[3, 4, 2]", "[null, 1, 0]", "[null, 4, 3]");
}

class TestTakeKernelFSB : public TestTakeKernel<FixedSizeBinaryType> {
class TestTakeKernelFSB : public TestTakeKernelTyped<FixedSizeBinaryType> {
public:
std::shared_ptr<DataType> value_type() { return fixed_size_binary(3); }

Expand All @@ -1087,6 +1119,9 @@ TEST_F(TestTakeKernelFSB, TakeFixedSizeBinary) {
this->AssertTake(R"([null, "bbb", "ccc"])", "[0, 1, 0]", "[null, \"bbb\", null]");
this->AssertTake(R"(["aaa", "bbb", "ccc"])", "[null, 1, 0]", R"([null, "bbb", "aaa"])");

this->TestNoValidityBitmapButUnknownNullCount(this->value_type(),
R"(["aaa", "bbb", "ccc"])", "[0, 1, 0]");

std::shared_ptr<DataType> type = this->value_type();
std::shared_ptr<Array> arr;
ASSERT_RAISES(IndexError,
Expand All @@ -1095,7 +1130,7 @@ TEST_F(TestTakeKernelFSB, TakeFixedSizeBinary) {
int64(), "[2, 5]", &arr));
}

class TestTakeKernelWithList : public TestTakeKernel<ListType> {};
class TestTakeKernelWithList : public TestTakeKernelTyped<ListType> {};

TEST_F(TestTakeKernelWithList, TakeListInt32) {
std::string list_json = "[[], [1,2], null, [3]]";
Expand All @@ -1107,6 +1142,9 @@ TEST_F(TestTakeKernelWithList, TakeListInt32) {
CheckTake(list(int32()), list_json, "[0, 1, 2, 3]", list_json);
CheckTake(list(int32()), list_json, "[0, 0, 0, 0, 0, 0, 1]",
"[[], [], [], [], [], [], [1, 2]]");

this->TestNoValidityBitmapButUnknownNullCount(list(int32()), "[[], [1,2], [3]]",
"[0, 1, 0]");
}

TEST_F(TestTakeKernelWithList, TakeListListInt32) {
Expand Down Expand Up @@ -1134,17 +1172,20 @@ TEST_F(TestTakeKernelWithList, TakeListListInt32) {
CheckTake(type, list_json, "[0, 1, 2, 3]", list_json);
CheckTake(type, list_json, "[0, 0, 0, 0, 0, 0, 1]",
"[[], [], [], [], [], [], [[1], [2, null, 2], []]]");

this->TestNoValidityBitmapButUnknownNullCount(
type, "[[[1], [2, null, 2], []], [[3, null]]]", "[0, 1, 0]");
}

class TestTakeKernelWithLargeList : public TestTakeKernel<LargeListType> {};
class TestTakeKernelWithLargeList : public TestTakeKernelTyped<LargeListType> {};

TEST_F(TestTakeKernelWithLargeList, TakeLargeListInt32) {
std::string list_json = "[[], [1,2], null, [3]]";
CheckTake(large_list(int32()), list_json, "[]", "[]");
CheckTake(large_list(int32()), list_json, "[null, 1, 2, 0]", "[null, [1,2], null, []]");
}

class TestTakeKernelWithFixedSizeList : public TestTakeKernel<FixedSizeListType> {};
class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped<FixedSizeListType> {};

TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) {
std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]";
Expand All @@ -1160,9 +1201,13 @@ TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) {
CheckTake(
fixed_size_list(int32(), 3), list_json, "[2, 2, 2, 2, 2, 2, 1]",
"[[4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [1, null, 3]]");

this->TestNoValidityBitmapButUnknownNullCount(fixed_size_list(int32(), 3),
"[[1, null, 3], [4, 5, 6], [7, 8, null]]",
"[0, 1, 0]");
}

class TestTakeKernelWithMap : public TestTakeKernel<MapType> {};
class TestTakeKernelWithMap : public TestTakeKernelTyped<MapType> {};

TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) {
std::string map_json = R"([
Expand Down Expand Up @@ -1196,7 +1241,7 @@ TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) {
])");
}

class TestTakeKernelWithStruct : public TestTakeKernel<StructType> {};
class TestTakeKernelWithStruct : public TestTakeKernelTyped<StructType> {};

TEST_F(TestTakeKernelWithStruct, TakeStruct) {
auto struct_type = struct_({field("a", int32()), field("b", utf8())});
Expand Down Expand Up @@ -1229,9 +1274,12 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) {
{"a": 2, "b": "hello"},
{"a": 2, "b": "hello"}
])");

this->TestNoValidityBitmapButUnknownNullCount(
struct_type, R"([{"a": 1}, {"a": 2, "b": "hello"}])", "[0, 1, 0]");
}

class TestTakeKernelWithUnion : public TestTakeKernel<UnionType> {};
class TestTakeKernelWithUnion : public TestTakeKernelTyped<UnionType> {};

// TODO: Restore Union take functionality
TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) {
Expand Down Expand Up @@ -1385,7 +1433,7 @@ TEST_F(TestPermutationsWithTake, InvertPermutation) {
}
}

class TestTakeKernelWithRecordBatch : public TestTakeKernel<RecordBatch> {
class TestTakeKernelWithRecordBatch : public TestTakeKernelTyped<RecordBatch> {
public:
void AssertTake(const std::shared_ptr<Schema>& schm, const std::string& batch_json,
const std::string& indices, const std::string& expected_batch) {
Expand Down Expand Up @@ -1444,7 +1492,7 @@ TEST_F(TestTakeKernelWithRecordBatch, TakeRecordBatch) {
])");
}

class TestTakeKernelWithChunkedArray : public TestTakeKernel<ChunkedArray> {
class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped<ChunkedArray> {
public:
void AssertTake(const std::shared_ptr<DataType>& type,
const std::vector<std::string>& values, const std::string& indices,
Expand Down Expand Up @@ -1501,7 +1549,7 @@ TEST_F(TestTakeKernelWithChunkedArray, TakeChunkedArray) {
{"[0, 1, 0]", "[5, 1]"}, &arr));
}

class TestTakeKernelWithTable : public TestTakeKernel<Table> {
class TestTakeKernelWithTable : public TestTakeKernelTyped<Table> {
public:
void AssertTake(const std::shared_ptr<Schema>& schm,
const std::vector<std::string>& table_json, const std::string& filter,
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/compute/kernels/vector_sort_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ class TestNthToIndices : public TestBase {
protected:
void AssertNthToIndicesArray(const std::shared_ptr<Array> values, int n) {
ASSERT_OK_AND_ASSIGN(std::shared_ptr<Array> offsets, NthToIndices(*values, n));
// null_count field should have been initialized to 0, for convenience
ASSERT_EQ(offsets->data()->null_count, 0);
ASSERT_OK(offsets->ValidateFull());
Validate<ArrayType>(*checked_pointer_cast<ArrayType>(values), n,
*checked_pointer_cast<UInt64Array>(offsets));
Expand Down