diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc index d48f2d0229d..ed51f03f888 100644 --- a/cpp/src/arrow/sparse_tensor-test.cc +++ b/cpp/src/arrow/sparse_tensor-test.cc @@ -38,6 +38,15 @@ static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected, ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id()); } +static inline void AssertCOOIndex( + const std::shared_ptr& sidx, const int64_t nth, + const std::vector& expected_values) { + int64_t n = static_cast(expected_values.size()); + for (int64_t i = 0; i < n; ++i) { + ASSERT_EQ(expected_values[i], sidx->Value({nth, i})); + } +} + TEST(TestSparseCOOTensor, CreationEmptyTensor) { std::vector shape = {2, 3, 4}; SparseTensorImpl st1(int64(), shape); @@ -84,13 +93,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) { ASSERT_EQ("", st1.dim_name(1)); ASSERT_EQ("", st1.dim_name(2)); - const int64_t* ptr = reinterpret_cast(st1.raw_data()); - for (int i = 0; i < 6; ++i) { - ASSERT_EQ(i + 1, ptr[i]); - } - for (int i = 0; i < 6; ++i) { - ASSERT_EQ(i + 11, ptr[i + 6]); - } + const int64_t* raw_data = reinterpret_cast(st1.raw_data()); + AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); const auto& si = internal::checked_cast(*st1.sparse_index()); ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString()); @@ -99,30 +103,11 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) { ASSERT_EQ(std::vector({12, 3}), sidx->shape()); ASSERT_TRUE(sidx->is_column_major()); - // (0, 0, 0) -> 1 - ASSERT_EQ(0, sidx->Value({0, 0})); - ASSERT_EQ(0, sidx->Value({0, 1})); - ASSERT_EQ(0, sidx->Value({0, 2})); - - // (0, 0, 2) -> 2 - ASSERT_EQ(0, sidx->Value({1, 0})); - ASSERT_EQ(0, sidx->Value({1, 1})); - ASSERT_EQ(2, sidx->Value({1, 2})); - - // (0, 1, 1) -> 3 - ASSERT_EQ(0, sidx->Value({2, 0})); - ASSERT_EQ(1, sidx->Value({2, 1})); - ASSERT_EQ(1, sidx->Value({2, 2})); - - // (1, 2, 1) -> 15 - ASSERT_EQ(1, sidx->Value({10, 0})); - ASSERT_EQ(2, sidx->Value({10, 1})); - ASSERT_EQ(1, sidx->Value({10, 2})); - - // (1, 2, 3) -> 16 - ASSERT_EQ(1, sidx->Value({11, 0})); - ASSERT_EQ(2, sidx->Value({11, 1})); - ASSERT_EQ(3, sidx->Value({11, 2})); + AssertCOOIndex(sidx, 0, {0, 0, 0}); + AssertCOOIndex(sidx, 1, {0, 0, 2}); + AssertCOOIndex(sidx, 2, {0, 1, 1}); + AssertCOOIndex(sidx, 10, {1, 2, 1}); + AssertCOOIndex(sidx, 11, {1, 2, 3}); } TEST(TestSparseCOOTensor, CreationFromTensor) { @@ -147,43 +132,47 @@ TEST(TestSparseCOOTensor, CreationFromTensor) { ASSERT_EQ("", st1.dim_name(1)); ASSERT_EQ("", st1.dim_name(2)); - const int64_t* ptr = reinterpret_cast(st1.raw_data()); - for (int i = 0; i < 6; ++i) { - ASSERT_EQ(i + 1, ptr[i]); - } - for (int i = 0; i < 6; ++i) { - ASSERT_EQ(i + 11, ptr[i + 6]); - } + const int64_t* raw_data = reinterpret_cast(st1.raw_data()); + AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); const auto& si = internal::checked_cast(*st1.sparse_index()); std::shared_ptr sidx = si.indices(); ASSERT_EQ(std::vector({12, 3}), sidx->shape()); ASSERT_TRUE(sidx->is_column_major()); - // (0, 0, 0) -> 1 - ASSERT_EQ(0, sidx->Value({0, 0})); - ASSERT_EQ(0, sidx->Value({0, 1})); - ASSERT_EQ(0, sidx->Value({0, 2})); - - // (0, 0, 2) -> 2 - ASSERT_EQ(0, sidx->Value({1, 0})); - ASSERT_EQ(0, sidx->Value({1, 1})); - ASSERT_EQ(2, sidx->Value({1, 2})); - - // (0, 1, 1) -> 3 - ASSERT_EQ(0, sidx->Value({2, 0})); - ASSERT_EQ(1, sidx->Value({2, 1})); - ASSERT_EQ(1, sidx->Value({2, 2})); - - // (1, 2, 1) -> 15 - ASSERT_EQ(1, sidx->Value({10, 0})); - ASSERT_EQ(2, sidx->Value({10, 1})); - ASSERT_EQ(1, sidx->Value({10, 2})); - - // (1, 2, 3) -> 16 - ASSERT_EQ(1, sidx->Value({11, 0})); - ASSERT_EQ(2, sidx->Value({11, 1})); - ASSERT_EQ(3, sidx->Value({11, 2})); + AssertCOOIndex(sidx, 0, {0, 0, 0}); + AssertCOOIndex(sidx, 1, {0, 0, 2}); + AssertCOOIndex(sidx, 2, {0, 1, 1}); + AssertCOOIndex(sidx, 10, {1, 2, 1}); + AssertCOOIndex(sidx, 11, {1, 2, 3}); +} + +TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) { + std::vector shape = {2, 3, 4}; + std::vector values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0, + 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0}; + std::vector strides = {192, 64, 16}; + std::shared_ptr buffer = Buffer::Wrap(values); + Tensor tensor(int64(), buffer, shape, strides); + SparseTensorImpl st(tensor); + + ASSERT_EQ(12, st.non_zero_length()); + ASSERT_TRUE(st.is_mutable()); + + const int64_t* raw_data = reinterpret_cast(st.raw_data()); + AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); + + const auto& si = internal::checked_cast(*st.sparse_index()); + std::shared_ptr sidx = si.indices(); + ASSERT_EQ(std::vector({12, 3}), sidx->shape()); + ASSERT_TRUE(sidx->is_column_major()); + + AssertCOOIndex(sidx, 0, {0, 0, 0}); + AssertCOOIndex(sidx, 1, {0, 0, 2}); + AssertCOOIndex(sidx, 2, {0, 1, 1}); + AssertCOOIndex(sidx, 10, {1, 2, 1}); + AssertCOOIndex(sidx, 11, {1, 2, 3}); } TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) { @@ -211,16 +200,10 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) { ASSERT_EQ("", st1.dim_name(1)); ASSERT_EQ("", st1.dim_name(2)); - const int64_t* ptr = reinterpret_cast(st1.raw_data()); - for (int i = 0; i < 6; ++i) { - ASSERT_EQ(i + 1, ptr[i]); - } - for (int i = 0; i < 6; ++i) { - ASSERT_EQ(i + 11, ptr[i + 6]); - } + const int64_t* raw_data = reinterpret_cast(st1.raw_data()); + AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); const auto& si = internal::checked_cast(*st1.sparse_index()); - ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString()); ASSERT_EQ(1, si.indptr()->ndim()); ASSERT_EQ(1, si.indices()->ndim()); @@ -241,4 +224,40 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) { ASSERT_EQ(std::vector({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values); } +TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { + std::vector shape = {6, 4}; + std::vector values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, + 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0, + 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0}; + std::vector strides = {64, 16}; + std::shared_ptr buffer = Buffer::Wrap(values); + Tensor tensor(int64(), buffer, shape, strides); + SparseTensorImpl st(tensor); + + ASSERT_EQ(12, st.non_zero_length()); + ASSERT_TRUE(st.is_mutable()); + + const int64_t* raw_data = reinterpret_cast(st.raw_data()); + AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); + + const auto& si = internal::checked_cast(*st.sparse_index()); + ASSERT_EQ(1, si.indptr()->ndim()); + ASSERT_EQ(1, si.indices()->ndim()); + + const int64_t* indptr_begin = reinterpret_cast(si.indptr()->raw_data()); + std::vector indptr_values(indptr_begin, + indptr_begin + si.indptr()->shape()[0]); + + ASSERT_EQ(7, indptr_values.size()); + ASSERT_EQ(std::vector({0, 2, 4, 6, 8, 10, 12}), indptr_values); + + const int64_t* indices_begin = + reinterpret_cast(si.indices()->raw_data()); + std::vector indices_values(indices_begin, + indices_begin + si.indices()->shape()[0]); + + ASSERT_EQ(12, indices_values.size()); + ASSERT_EQ(std::vector({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values); +} + } // namespace arrow diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc index a437e6db5ad..af20aed0d6e 100644 --- a/cpp/src/arrow/tensor-test.cc +++ b/cpp/src/arrow/tensor-test.cc @@ -104,13 +104,16 @@ TEST(TestTensor, ZeroDimensionalTensor) { ASSERT_EQ(t.strides().size(), 1); } -TEST(TestNumericTensor, ElementAccess) { +TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) { std::vector shape = {3, 4}; std::vector values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; std::shared_ptr buffer_i64(Buffer::Wrap(values_i64)); NumericTensor t_i64(buffer_i64, shape); + ASSERT_TRUE(t_i64.is_row_major()); + ASSERT_FALSE(t_i64.is_column_major()); + ASSERT_TRUE(t_i64.is_contiguous()); ASSERT_EQ(1, t_i64.Value({0, 0})); ASSERT_EQ(5, t_i64.Value({1, 0})); ASSERT_EQ(6, t_i64.Value({1, 1})); @@ -121,22 +124,27 @@ TEST(TestNumericTensor, ElementAccess) { std::shared_ptr buffer_f32(Buffer::Wrap(values_f32)); NumericTensor t_f32(buffer_f32, shape); + ASSERT_TRUE(t_f32.is_row_major()); + ASSERT_FALSE(t_f32.is_column_major()); + ASSERT_TRUE(t_f32.is_contiguous()); ASSERT_EQ(1.1f, t_f32.Value({0, 0})); ASSERT_EQ(5.1f, t_f32.Value({1, 0})); ASSERT_EQ(6.1f, t_f32.Value({1, 1})); ASSERT_EQ(11.1f, t_f32.Value({2, 2})); } -TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) { +TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) { std::vector shape = {3, 4}; const int64_t i64_size = sizeof(int64_t); - std::vector values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7, - 8, 0, 0, 9, 10, 11, 12, 0, 0}; - std::vector strides_i64 = {i64_size * 6, i64_size}; + std::vector values_i64 = {1, 5, 9, 2, 6, 10, 3, 7, 11, 4, 8, 12}; + std::vector strides_i64 = {i64_size, i64_size * 3}; std::shared_ptr buffer_i64(Buffer::Wrap(values_i64)); NumericTensor t_i64(buffer_i64, shape, strides_i64); + ASSERT_TRUE(t_i64.is_column_major()); + ASSERT_FALSE(t_i64.is_row_major()); + ASSERT_TRUE(t_i64.is_contiguous()); ASSERT_EQ(1, t_i64.Value({0, 0})); ASSERT_EQ(2, t_i64.Value({0, 1})); ASSERT_EQ(4, t_i64.Value({0, 3})); @@ -145,13 +153,15 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) { ASSERT_EQ(11, t_i64.Value({2, 2})); const int64_t f32_size = sizeof(float); - std::vector values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f, - 5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f, - 9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f}; - std::vector strides_f32 = {f32_size * 6, f32_size}; + std::vector values_f32 = {1.1f, 5.1f, 9.1f, 2.1f, 6.1f, 10.1f, + 3.1f, 7.1f, 11.1f, 4.1f, 8.1f, 12.1f}; + std::vector strides_f32 = {f32_size, f32_size * 3}; std::shared_ptr buffer_f32(Buffer::Wrap(values_f32)); NumericTensor t_f32(buffer_f32, shape, strides_f32); + ASSERT_TRUE(t_f32.is_column_major()); + ASSERT_FALSE(t_f32.is_row_major()); + ASSERT_TRUE(t_f32.is_contiguous()); ASSERT_EQ(1.1f, t_f32.Value({0, 0})); ASSERT_EQ(2.1f, t_f32.Value({0, 1})); ASSERT_EQ(4.1f, t_f32.Value({0, 3})); @@ -160,15 +170,19 @@ TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) { ASSERT_EQ(11.1f, t_f32.Value({2, 2})); } -TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) { +TEST(TestNumericTensor, ElementAccessWithNonContiguousStrides) { std::vector shape = {3, 4}; const int64_t i64_size = sizeof(int64_t); - std::vector values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0}; - std::vector strides_i64 = {i64_size, i64_size * 4}; + std::vector values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7, + 8, 0, 0, 9, 10, 11, 12, 0, 0}; + std::vector strides_i64 = {i64_size * 6, i64_size}; std::shared_ptr buffer_i64(Buffer::Wrap(values_i64)); NumericTensor t_i64(buffer_i64, shape, strides_i64); + ASSERT_FALSE(t_i64.is_contiguous()); + ASSERT_FALSE(t_i64.is_row_major()); + ASSERT_FALSE(t_i64.is_column_major()); ASSERT_EQ(1, t_i64.Value({0, 0})); ASSERT_EQ(2, t_i64.Value({0, 1})); ASSERT_EQ(4, t_i64.Value({0, 3})); @@ -177,12 +191,16 @@ TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) { ASSERT_EQ(11, t_i64.Value({2, 2})); const int64_t f32_size = sizeof(float); - std::vector values_f32 = {1.1f, 5.1f, 9.1f, 0.0f, 2.1f, 6.1f, 10.1f, 0.0f, - 3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f}; - std::vector strides_f32 = {f32_size, f32_size * 4}; + std::vector values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f, + 5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f, + 9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f}; + std::vector strides_f32 = {f32_size * 6, f32_size}; std::shared_ptr buffer_f32(Buffer::Wrap(values_f32)); NumericTensor t_f32(buffer_f32, shape, strides_f32); + ASSERT_FALSE(t_f32.is_contiguous()); + ASSERT_FALSE(t_f32.is_row_major()); + ASSERT_FALSE(t_f32.is_column_major()); ASSERT_EQ(1.1f, t_f32.Value({0, 0})); ASSERT_EQ(2.1f, t_f32.Value({0, 1})); ASSERT_EQ(4.1f, t_f32.Value({0, 3})); diff --git a/cpp/src/arrow/test-util.h b/cpp/src/arrow/test-util.h index aa7c73e59ac..713ff38ca52 100644 --- a/cpp/src/arrow/test-util.h +++ b/cpp/src/arrow/test-util.h @@ -202,6 +202,15 @@ ARROW_EXPORT void PrintColumn(const Column& col, std::stringstream* ss); ARROW_EXPORT void AssertTablesEqual(const Table& expected, const Table& actual, bool same_chunk_layout = true); +template +void AssertNumericDataEqual(const C_TYPE* raw_data, + const std::vector& expected_values) { + for (auto expected : expected_values) { + ASSERT_EQ(expected, *raw_data); + ++raw_data; + } +} + ARROW_EXPORT void CompareBatch(const RecordBatch& left, const RecordBatch& right); // Check if the padding of the buffers of the array is zero.