diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index ecf3d6962f5..6443c96e918 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -473,6 +473,9 @@ class KernelExecutorImpl : public KernelExecutor { if (validity_preallocated_) { ARROW_ASSIGN_OR_RAISE(out->buffers[0], kernel_ctx_->AllocateBitmap(length)); } + if (kernel_->null_handling == NullHandling::OUTPUT_NOT_NULL) { + out->null_count = 0; + } for (size_t i = 0; i < data_preallocated_.size(); ++i) { const auto& prealloc = data_preallocated_[i]; if (prealloc.bit_width >= 0) { diff --git a/cpp/src/arrow/compute/kernels/util_internal.cc b/cpp/src/arrow/compute/kernels/util_internal.cc index 93badbd3b25..1656ed2aaf3 100644 --- a/cpp/src/arrow/compute/kernels/util_internal.cc +++ b/cpp/src/arrow/compute/kernels/util_internal.cc @@ -53,7 +53,7 @@ PrimitiveArg GetPrimitiveArg(const ArrayData& arr) { arg.data += arr.offset * arg.bit_width / 8; } // This may be kUnknownNullCount - arg.null_count = arr.null_count.load(); + arg.null_count = (arg.is_valid != nullptr) ? arr.null_count.load() : 0; return arg; } diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index 0785a1d602e..cf52870ed89 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -947,10 +947,37 @@ uint64_t GetMaxIndex(int64_t values_length) { return static_cast(values_length - 1); } +class TestTakeKernel : public ::testing::Test { + public: + void TestNoValidityBitmapButUnknownNullCount(const std::shared_ptr& values, + const std::shared_ptr& indices) { + ASSERT_EQ(values->null_count(), 0); + ASSERT_EQ(indices->null_count(), 0); + auto expected = (*Take(values, indices)).make_array(); + + auto new_values = MakeArray(values->data()->Copy()); + new_values->data()->buffers[0].reset(); + new_values->data()->null_count = kUnknownNullCount; + auto new_indices = MakeArray(indices->data()->Copy()); + new_indices->data()->buffers[0].reset(); + new_indices->data()->null_count = kUnknownNullCount; + auto result = (*Take(new_values, new_indices)).make_array(); + + AssertArraysEqual(*expected, *result); + } + + void TestNoValidityBitmapButUnknownNullCount(const std::shared_ptr& type, + const std::string& values, + const std::string& indices) { + TestNoValidityBitmapButUnknownNullCount(ArrayFromJSON(type, values), + ArrayFromJSON(int16(), indices)); + } +}; + template -class TestTakeKernel : public ::testing::Test {}; +class TestTakeKernelTyped : public TestTakeKernel {}; -TEST(TestTakeKernel, TakeNull) { +TEST_F(TestTakeKernel, TakeNull) { AssertTakeNull("[null, null, null]", "[0, 1, 0]", "[null, null, null]"); AssertTakeNull("[null, null, null]", "[0, 2]", "[null, null]"); @@ -961,13 +988,13 @@ TEST(TestTakeKernel, TakeNull) { TakeJSON(boolean(), "[null, null, null]", int8(), "[0, -1, 0]", &arr)); } -TEST(TestTakeKernel, InvalidIndexType) { +TEST_F(TestTakeKernel, InvalidIndexType) { std::shared_ptr arr; ASSERT_RAISES(NotImplemented, TakeJSON(null(), "[null, null, null]", float32(), "[0.0, 1.0, 0.1]", &arr)); } -TEST(TestTakeKernel, DefaultOptions) { +TEST_F(TestTakeKernel, DefaultOptions) { auto indices = ArrayFromJSON(int8(), "[null, 2, 0, 3]"); auto values = ArrayFromJSON(int8(), "[7, 8, 9, null]"); ASSERT_OK_AND_ASSIGN(auto no_options_provided, CallFunction("take", {values, indices})); @@ -979,12 +1006,14 @@ TEST(TestTakeKernel, DefaultOptions) { AssertDatumsEqual(explicit_defaults, no_options_provided); } -TEST(TestTakeKernel, TakeBoolean) { +TEST_F(TestTakeKernel, TakeBoolean) { AssertTakeBoolean("[7, 8, 9]", "[]", "[]"); AssertTakeBoolean("[true, false, true]", "[0, 1, 0]", "[true, false, true]"); AssertTakeBoolean("[null, false, true]", "[0, 1, 0]", "[null, false, null]"); AssertTakeBoolean("[true, false, true]", "[null, 1, 0]", "[null, false, true]"); + TestNoValidityBitmapButUnknownNullCount(boolean(), "[true, false, true]", "[1, 0, 0]"); + std::shared_ptr arr; ASSERT_RAISES(IndexError, TakeJSON(boolean(), "[true, false, true]", int8(), "[0, 9, 0]", &arr)); @@ -993,7 +1022,7 @@ TEST(TestTakeKernel, TakeBoolean) { } template -class TestTakeKernelWithNumeric : public TestTakeKernel { +class TestTakeKernelWithNumeric : public TestTakeKernelTyped { protected: void AssertTake(const std::string& values, const std::string& indices, const std::string& expected) { @@ -1022,7 +1051,7 @@ TYPED_TEST(TestTakeKernelWithNumeric, TakeNumeric) { } template -class TestTakeKernelWithString : public TestTakeKernel { +class TestTakeKernelWithString : public TestTakeKernelTyped { public: std::shared_ptr value_type() { return TypeTraits::type_singleton(); @@ -1057,6 +1086,9 @@ TYPED_TEST(TestTakeKernelWithString, TakeString) { this->AssertTake(R"([null, "b", "c"])", "[0, 1, 0]", "[null, \"b\", null]"); this->AssertTake(R"(["a", "b", "c"])", "[null, 1, 0]", R"([null, "b", "a"])"); + this->TestNoValidityBitmapButUnknownNullCount(this->value_type(), R"(["a", "b", "c"])", + "[0, 1, 0]"); + std::shared_ptr type = this->value_type(); std::shared_ptr arr; ASSERT_RAISES(IndexError, @@ -1072,7 +1104,7 @@ TYPED_TEST(TestTakeKernelWithString, TakeDictionary) { this->AssertTakeDictionary(dict, "[3, 4, 2]", "[null, 1, 0]", "[null, 4, 3]"); } -class TestTakeKernelFSB : public TestTakeKernel { +class TestTakeKernelFSB : public TestTakeKernelTyped { public: std::shared_ptr value_type() { return fixed_size_binary(3); } @@ -1087,6 +1119,9 @@ TEST_F(TestTakeKernelFSB, TakeFixedSizeBinary) { this->AssertTake(R"([null, "bbb", "ccc"])", "[0, 1, 0]", "[null, \"bbb\", null]"); this->AssertTake(R"(["aaa", "bbb", "ccc"])", "[null, 1, 0]", R"([null, "bbb", "aaa"])"); + this->TestNoValidityBitmapButUnknownNullCount(this->value_type(), + R"(["aaa", "bbb", "ccc"])", "[0, 1, 0]"); + std::shared_ptr type = this->value_type(); std::shared_ptr arr; ASSERT_RAISES(IndexError, @@ -1095,7 +1130,7 @@ TEST_F(TestTakeKernelFSB, TakeFixedSizeBinary) { int64(), "[2, 5]", &arr)); } -class TestTakeKernelWithList : public TestTakeKernel {}; +class TestTakeKernelWithList : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithList, TakeListInt32) { std::string list_json = "[[], [1,2], null, [3]]"; @@ -1107,6 +1142,9 @@ TEST_F(TestTakeKernelWithList, TakeListInt32) { CheckTake(list(int32()), list_json, "[0, 1, 2, 3]", list_json); CheckTake(list(int32()), list_json, "[0, 0, 0, 0, 0, 0, 1]", "[[], [], [], [], [], [], [1, 2]]"); + + this->TestNoValidityBitmapButUnknownNullCount(list(int32()), "[[], [1,2], [3]]", + "[0, 1, 0]"); } TEST_F(TestTakeKernelWithList, TakeListListInt32) { @@ -1134,9 +1172,12 @@ TEST_F(TestTakeKernelWithList, TakeListListInt32) { CheckTake(type, list_json, "[0, 1, 2, 3]", list_json); CheckTake(type, list_json, "[0, 0, 0, 0, 0, 0, 1]", "[[], [], [], [], [], [], [[1], [2, null, 2], []]]"); + + this->TestNoValidityBitmapButUnknownNullCount( + type, "[[[1], [2, null, 2], []], [[3, null]]]", "[0, 1, 0]"); } -class TestTakeKernelWithLargeList : public TestTakeKernel {}; +class TestTakeKernelWithLargeList : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithLargeList, TakeLargeListInt32) { std::string list_json = "[[], [1,2], null, [3]]"; @@ -1144,7 +1185,7 @@ TEST_F(TestTakeKernelWithLargeList, TakeLargeListInt32) { CheckTake(large_list(int32()), list_json, "[null, 1, 2, 0]", "[null, [1,2], null, []]"); } -class TestTakeKernelWithFixedSizeList : public TestTakeKernel {}; +class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) { std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]"; @@ -1160,9 +1201,13 @@ TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) { CheckTake( fixed_size_list(int32(), 3), list_json, "[2, 2, 2, 2, 2, 2, 1]", "[[4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [4, 5, 6], [1, null, 3]]"); + + this->TestNoValidityBitmapButUnknownNullCount(fixed_size_list(int32(), 3), + "[[1, null, 3], [4, 5, 6], [7, 8, null]]", + "[0, 1, 0]"); } -class TestTakeKernelWithMap : public TestTakeKernel {}; +class TestTakeKernelWithMap : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) { std::string map_json = R"([ @@ -1196,7 +1241,7 @@ TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) { ])"); } -class TestTakeKernelWithStruct : public TestTakeKernel {}; +class TestTakeKernelWithStruct : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithStruct, TakeStruct) { auto struct_type = struct_({field("a", int32()), field("b", utf8())}); @@ -1229,9 +1274,12 @@ TEST_F(TestTakeKernelWithStruct, TakeStruct) { {"a": 2, "b": "hello"}, {"a": 2, "b": "hello"} ])"); + + this->TestNoValidityBitmapButUnknownNullCount( + struct_type, R"([{"a": 1}, {"a": 2, "b": "hello"}])", "[0, 1, 0]"); } -class TestTakeKernelWithUnion : public TestTakeKernel {}; +class TestTakeKernelWithUnion : public TestTakeKernelTyped {}; // TODO: Restore Union take functionality TEST_F(TestTakeKernelWithUnion, DISABLED_TakeUnion) { @@ -1385,7 +1433,7 @@ TEST_F(TestPermutationsWithTake, InvertPermutation) { } } -class TestTakeKernelWithRecordBatch : public TestTakeKernel { +class TestTakeKernelWithRecordBatch : public TestTakeKernelTyped { public: void AssertTake(const std::shared_ptr& schm, const std::string& batch_json, const std::string& indices, const std::string& expected_batch) { @@ -1444,7 +1492,7 @@ TEST_F(TestTakeKernelWithRecordBatch, TakeRecordBatch) { ])"); } -class TestTakeKernelWithChunkedArray : public TestTakeKernel { +class TestTakeKernelWithChunkedArray : public TestTakeKernelTyped { public: void AssertTake(const std::shared_ptr& type, const std::vector& values, const std::string& indices, @@ -1501,7 +1549,7 @@ TEST_F(TestTakeKernelWithChunkedArray, TakeChunkedArray) { {"[0, 1, 0]", "[5, 1]"}, &arr)); } -class TestTakeKernelWithTable : public TestTakeKernel { +class TestTakeKernelWithTable : public TestTakeKernelTyped
{ public: void AssertTake(const std::shared_ptr& schm, const std::vector& table_json, const std::string& filter, diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc index 0c9cad508ef..cbeaacf39a8 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc @@ -132,6 +132,8 @@ class TestNthToIndices : public TestBase { protected: void AssertNthToIndicesArray(const std::shared_ptr values, int n) { ASSERT_OK_AND_ASSIGN(std::shared_ptr offsets, NthToIndices(*values, n)); + // null_count field should have been initialized to 0, for convenience + ASSERT_EQ(offsets->data()->null_count, 0); ASSERT_OK(offsets->ValidateFull()); Validate(*checked_pointer_cast(values), n, *checked_pointer_cast(offsets));