From eff44306f6299406b368355e9fb4fa8a798b80c5 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Fri, 23 Jun 2017 14:49:42 +0200 Subject: [PATCH] ARROW-1143: C++: Fix comparison of NullArray Change-Id: Ib18dc6b00c9806aaf541c61cb63673ac51b0525c --- cpp/src/arrow/array-test.cc | 10 ++++++++++ cpp/src/arrow/compare.cc | 7 +++++-- python/pyarrow/tests/test_convert_pandas.py | 6 ++++++ python/pyarrow/tests/test_parquet.py | 6 +++++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index beffa1b11cb..8f6323b7555 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -95,6 +95,16 @@ TEST_F(TestArray, TestEquality) { EXPECT_FALSE(array->RangeEquals(1, 2, 1, unequal_array)); } +TEST_F(TestArray, TestNullArrayEquality) { + auto array_1 = std::make_shared(10); + auto array_2 = std::make_shared(10); + auto array_3 = std::make_shared(20); + + EXPECT_TRUE(array_1->Equals(array_1)); + EXPECT_TRUE(array_1->Equals(array_2)); + EXPECT_FALSE(array_1->Equals(array_3)); +} + TEST_F(TestArray, SliceRecomputeNullCount) { vector valid_bytes = {1, 0, 1, 1, 0, 1, 0, 0, 0}; diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index 562d4e1b4dd..c2f4f845baf 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -322,7 +322,10 @@ class ArrayEqualsVisitor : public RangeEqualsVisitor { explicit ArrayEqualsVisitor(const Array& right) : RangeEqualsVisitor(right, 0, right.length(), 0) {} - Status Visit(const NullArray& left) { return Status::OK(); } + Status Visit(const NullArray& left) { + result_ = true; + return Status::OK(); + } Status Visit(const BooleanArray& left) { const auto& right = static_cast(right_); @@ -529,7 +532,7 @@ static bool BaseDataEquals(const Array& left, const Array& right) { left.type_id() != right.type_id()) { return false; } - if (left.null_count() > 0) { + if (left.null_count() > 0 && left.null_count() < left.length()) { return BitmapEquals(left.null_bitmap()->data(), left.offset(), right.null_bitmap()->data(), right.offset(), left.length()); } diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index d17ef3c0ad1..f6ada09cc2c 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -103,6 +103,12 @@ def test_all_none_objects(self): self._check_pandas_roundtrip(df) + def test_all_none_category(self): + df = pd.DataFrame({'a': [None, None, None]}) + df['a'] = df['a'].astype('category') + self._check_pandas_roundtrip(df) + + def test_float_no_nulls(self): data = {} fields = [] diff --git a/python/pyarrow/tests/test_parquet.py b/python/pyarrow/tests/test_parquet.py index 052d395b981..7c2a0457d13 100644 --- a/python/pyarrow/tests/test_parquet.py +++ b/python/pyarrow/tests/test_parquet.py @@ -225,8 +225,12 @@ def _test_dataframe(size=10000, seed=0): 'float32': np.random.randn(size).astype(np.float32), 'float64': np.arange(size, dtype=np.float64), 'bool': np.random.randn(size) > 0, - 'strings': [tm.rands(10) for i in range(size)] + 'strings': [tm.rands(10) for i in range(size)], + 'all_none': [None] * size, + 'all_none_category': [None] * size }) + # TODO(PARQUET-1015) + # df['all_none_category'] = df['all_none_category'].astype('category') return df