From d4190cc9ad15d30cb8b840f8a6df25c006d8009f Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Fri, 7 Oct 2022 15:00:10 -0400 Subject: [PATCH 1/2] ARROW-17964: [C++] Range data comparison for struct type may go out of bounds --- cpp/src/arrow/compare.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index c5406ee583f..d6eb6756284 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -305,6 +305,11 @@ class RangeDataEqualsImpl { Status Visit(const StructType& type) { const int32_t num_fields = type.num_fields(); + if (left_.child_data.size() != static_cast(num_fields) || + right_.child_data.size() != static_cast(num_fields)) { + result_ = false; + return Status::OK(); + } auto compare_runs = [&](int64_t i, int64_t length) -> bool { for (int32_t f = 0; f < num_fields; ++f) { RangeDataEqualsImpl impl(options_, floating_approximate_, *left_.child_data[f], From 62fdbe4d6f98bfe9a156642ffd8c30e4ea98fb4d Mon Sep 17 00:00:00 2001 From: Yaron Gvili Date: Thu, 13 Oct 2022 09:13:52 -0400 Subject: [PATCH 2/2] add tests --- cpp/src/arrow/compare.cc | 3 +++ cpp/src/arrow/datum_test.cc | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc index d6eb6756284..951e64f3dad 100644 --- a/cpp/src/arrow/compare.cc +++ b/cpp/src/arrow/compare.cc @@ -870,6 +870,9 @@ Status PrintDiff(const Array& left, const Array& right, int64_t left_offset, return Status::OK(); } + ARROW_RETURN_NOT_OK(left.ValidateFull()); + ARROW_RETURN_NOT_OK(right.ValidateFull()); + if (!left.type()->Equals(right.type())) { *os << "# Array types differed: " << *left.type() << " vs " << *right.type() << std::endl; diff --git a/cpp/src/arrow/datum_test.cc b/cpp/src/arrow/datum_test.cc index 8f962962a21..0918b04a87d 100644 --- a/cpp/src/arrow/datum_test.cc +++ b/cpp/src/arrow/datum_test.cc @@ -18,10 +18,12 @@ #include #include +#include #include #include "arrow/array/array_base.h" #include "arrow/array/array_binary.h" +#include "arrow/array/array_nested.h" #include "arrow/chunked_array.h" #include "arrow/datum.h" #include "arrow/record_batch.h" @@ -149,4 +151,49 @@ TEST(Datum, TotalBufferSize) { ASSERT_EQ(4, tab_datum.TotalBufferSize()); } +TEST(Datum, Equality) { + AssertDatumsEqual(ArrayFromJSON(struct_({field("a", int32())}), "[[0], [1], [2]]"), + ArrayFromJSON(struct_({field("a", int32())}), "[[0], [1], [2]]")); + EXPECT_FATAL_FAILURE( + AssertDatumsEqual(ArrayFromJSON(struct_({field("a", int32())}), "[[0], [1], [2]]"), + ArrayFromJSON(struct_({field("a", int64())}), "[[0], [1], [2]]")), + "Array types differed"); + AssertDatumsEqual(ArrayFromJSON(struct_({field("a", int32()), field("b", int64())}), + "[[0, 0], [1, 1], [2, 2]]"), + ArrayFromJSON(struct_({field("a", int32()), field("b", int64())}), + "[[0, 0], [1, 1], [2, 2]]")); + EXPECT_FATAL_FAILURE( + AssertDatumsEqual(ArrayFromJSON(struct_({field("a", int32()), field("b", int32())}), + "[[0, 0], [1, 1], [2, 2]]"), + ArrayFromJSON(struct_({field("a", int32())}), "[[0], [1], [2]]")), + "Array types differed"); + EXPECT_FATAL_FAILURE( + AssertDatumsEqual(ArrayFromJSON(struct_({field("a", int32())}), "[[0], [1], [2]]"), + ArrayFromJSON(struct_({field("a", int32()), field("b", int32())}), + "[[0, 0], [1, 1], [2, 2]]")), + "Array types differed"); + EXPECT_FATAL_FAILURE( + AssertDatumsEqual( + ChunkedArrayFromJSON(struct_({field("a", int32()), field("b", int32())}), + {"[[0, 0], [1, 1], [2, 2]]"}), + ChunkedArrayFromJSON(struct_({field("a", int32())}), {"[[0], [1], [2]]"})), + "Failed"); + EXPECT_FATAL_FAILURE( + AssertDatumsEqual( + ChunkedArrayFromJSON(struct_({field("a", int32())}), {"[[0], [1], [2]]"}), + ChunkedArrayFromJSON(struct_({field("a", int32()), field("b", int32())}), + {"[[0, 0], [1, 1], [2, 2]]"})), + "Failed"); + + EXPECT_FATAL_FAILURE( + AssertDatumsEqual( + ArrayFromJSON(struct_({field("a", int32())}), "[[0], [1], [2]]"), + std::make_shared( + struct_({field("a", int32())}), 3, + std::vector>{ArrayFromJSON(int32(), "[0, 1, 2]"), + ArrayFromJSON(int32(), "[0, 1, 2]")}, + NULLPTR, 0, 0)), + "Failed"); +} + } // namespace arrow