From e6e8dcfabd3c1579f0ca660ff7aa0407d0f9810c Mon Sep 17 00:00:00 2001 From: Rok Date: Mon, 7 Oct 2019 03:19:49 +0200 Subject: [PATCH 1/3] ARROW-6624 [C++] Add SparseTensor.ToTensor() method --- cpp/src/arrow/python/numpy_convert.cc | 4 +- cpp/src/arrow/sparse_tensor.cc | 132 ++++++++++++++++++++++++++ cpp/src/arrow/sparse_tensor.h | 3 + cpp/src/arrow/sparse_tensor_test.cc | 27 ++++++ 4 files changed, 164 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/python/numpy_convert.cc b/cpp/src/arrow/python/numpy_convert.cc index 6c1f3d7c967..792f47d0add 100644 --- a/cpp/src/arrow/python/numpy_convert.cc +++ b/cpp/src/arrow/python/numpy_convert.cc @@ -324,7 +324,7 @@ Status SparseCOOTensorToNdarray(const std::shared_ptr& sparse_t // Wrap tensor data OwnedRef result_data; RETURN_NOT_OK(SparseTensorDataToNdarray( - *sparse_tensor, {sparse_index.non_zero_length(), 1}, base, result_data.ref())); + *sparse_tensor, {sparse_tensor->non_zero_length(), 1}, base, result_data.ref())); // Wrap indices PyObject* result_coords; @@ -344,7 +344,7 @@ Status SparseCSRMatrixToNdarray(const std::shared_ptr& sparse_t // Wrap tensor data OwnedRef result_data; RETURN_NOT_OK(SparseTensorDataToNdarray( - *sparse_tensor, {sparse_index.non_zero_length(), 1}, base, result_data.ref())); + *sparse_tensor, {sparse_tensor->non_zero_length(), 1}, base, result_data.ref())); // Wrap indices OwnedRef result_indptr; diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index b6fe2f3a1e5..a1cee8be42c 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -364,6 +364,132 @@ void MakeSparseTensorFromTensor(const Tensor& tensor, } } +template +void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, + std::shared_ptr* tensor) { + using c_index_value_type = typename IndexValueType::c_type; + using NumericTensorType = NumericTensor; + using value_type = typename NumericTensorType::value_type; + + std::shared_ptr values_buffer; + ARROW_CHECK_OK( + AllocateBuffer(sizeof(value_type) * sparse_tensor->size(), &values_buffer)); + auto values = reinterpret_cast(values_buffer->mutable_data()); + + for (int64_t i = 0; i < sparse_tensor->size(); ++i) { + values[i] = 0; + } + + switch (sparse_tensor->format_id()) { + case SparseTensorFormat::COO: { + const SparseCOOIndex sparse_index = + internal::checked_cast(*sparse_tensor->sparse_index()); + const std::shared_ptr coords = sparse_index.indices(); + const auto raw_data = + reinterpret_cast(sparse_tensor->raw_data()); + std::vector strides(sparse_tensor->ndim(), 1); + + for (int i = sparse_tensor->ndim() - 1; i > 0; --i) { + strides[i - 1] *= strides[i] * sparse_tensor->shape()[i]; + } + for (int64_t i = 0; i < sparse_tensor->non_zero_length(); ++i) { + std::vector coord(sparse_tensor->ndim()); + int64_t offset = 0; + for (int64_t j = 0; j < static_cast(coord.size()); ++j) { + coord[j] = coords->Value({i, j}); + offset += coord[j] * strides[j]; + } + values[offset] = raw_data[i]; + } + *tensor = std::make_shared(sparse_tensor->type(), values_buffer, + sparse_tensor->shape()); + break; + } + + case SparseTensorFormat::CSR: { + const SparseCSRIndex sparse_index = + internal::checked_cast(*sparse_tensor->sparse_index()); + const std::shared_ptr indptr = sparse_index.indptr(); + const std::shared_ptr indices = sparse_index.indices(); + const auto raw_data = + reinterpret_cast(sparse_tensor->raw_data()); + + int64_t offset; + for (int64_t i = 0; i < indptr->size() - 1; ++i) { + const int64_t start = indptr->Value({i}); + const int64_t stop = indptr->Value({i + 1}); + for (int64_t j = start; j < stop; ++j) { + offset = indices->Value({j}) + i * sparse_tensor->shape()[1]; + values[offset] = raw_data[j]; + } + } + *tensor = std::make_shared(sparse_tensor->type(), values_buffer, + sparse_tensor->shape()); + break; + } + } +} + +#define MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE(IndexValueType) \ + case IndexValueType##Type::type_id: \ + MakeTensorFromSparseTensor(sparse_tensor, tensor); \ + break; + +template +void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, + std::shared_ptr* tensor) { + std::shared_ptr type; + switch (sparse_tensor->format_id()) { + case SparseTensorFormat::COO: { + const SparseCOOIndex sparse_index = + internal::checked_cast(*sparse_tensor->sparse_index()); + const std::shared_ptr indices = sparse_index.indices(); + type = indices->type(); + break; + } + case SparseTensorFormat::CSR: { + const SparseCSRIndex sparse_index = + internal::checked_cast(*sparse_tensor->sparse_index()); + const std::shared_ptr indices = sparse_index.indices(); + type = indices->type(); + break; + } + // LCOV_EXCL_START: ignore program failure + default: + ARROW_LOG(FATAL) << "Unsupported SparseIndex value type"; + break; + // LCOV_EXCL_STOP + } + + switch (type->id()) { + ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE); + // LCOV_EXCL_START: ignore program failure + default: + ARROW_LOG(FATAL) << "Unsupported SparseIndex value type"; + break; + // LCOV_EXCL_STOP + } +} +#undef MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE + +#define MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE(TYPE) \ + case TYPE##Type::type_id: \ + MakeTensorFromSparseTensor(sparse_tensor, tensor); \ + break; + +void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, + std::shared_ptr* tensor) { + switch (sparse_tensor->type()->id()) { + ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE); + // LCOV_EXCL_START: ignore program failure + default: + ARROW_LOG(FATAL) << "Unsupported SparseTensor value type"; + break; + // LCOV_EXCL_STOP + } +} +#undef MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE + } // namespace internal // ---------------------------------------------------------------------- @@ -429,4 +555,10 @@ bool SparseTensor::Equals(const SparseTensor& other) const { return SparseTensorEquals(*this, other); } +std::shared_ptr SparseTensor::ToTensor() const { + std::shared_ptr tensor; + internal::MakeTensorFromSparseTensor(this, &tensor); + return tensor; +} + } // namespace arrow diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 47df0115028..e35e18cb215 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -204,6 +204,9 @@ class ARROW_EXPORT SparseTensor { /// \brief Return whether sparse tensors are equal bool Equals(const SparseTensor& other) const; + /// \brief Return dense representation of sparse tensor as tensor + std::shared_ptr ToTensor() const; + protected: // Constructor with all attributes SparseTensor(const std::shared_ptr& type, const std::shared_ptr& data, diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index e37f3e46ec9..1348a8bc213 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -202,6 +202,20 @@ TEST_F(TestSparseCOOTensor, TensorEquality) { ASSERT_FALSE(st1.Equals(st2)); } +TEST_F(TestSparseCOOTensor, TestToTensor) { + std::vector values = {1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4}; + std::vector shape({4, 3, 2}); + std::shared_ptr buffer = Buffer::Wrap(values); + Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); + SparseTensorImpl sparse_tensor(tensor); + + ASSERT_EQ(5, sparse_tensor.non_zero_length()); + ASSERT_TRUE(sparse_tensor.is_mutable()); + std::shared_ptr dense_tensor = sparse_tensor.ToTensor(); + ASSERT_TRUE(tensor.Equals(*dense_tensor)); +} + template class TestSparseCOOTensorForIndexValueType : public TestSparseCOOTensorBase { @@ -469,4 +483,17 @@ TEST_F(TestSparseCSRMatrix, TensorEquality) { ASSERT_FALSE(st1.Equals(st2)); } +TEST_F(TestSparseCSRMatrix, TestToTensor) { + std::vector values = {1, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1, + 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1}; + std::vector shape({6, 4}); + std::shared_ptr buffer = Buffer::Wrap(values); + Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); + SparseTensorImpl sparse_tensor(tensor); + + ASSERT_EQ(7, sparse_tensor.non_zero_length()); + ASSERT_TRUE(sparse_tensor.is_mutable()); + std::shared_ptr dense_tensor = sparse_tensor.ToTensor(); + ASSERT_TRUE(tensor.Equals(*dense_tensor)); +} } // namespace arrow From 89f3eca016f884071dcd89b38bbcf3003be9c678 Mon Sep 17 00:00:00 2001 From: Rok Date: Sat, 12 Oct 2019 16:42:49 +0200 Subject: [PATCH 2/3] Adding python interface. --- python/pyarrow/includes/libarrow.pxd | 2 ++ python/pyarrow/tensor.pxi | 20 ++++++++++++++++++++ python/pyarrow/tests/test_sparse_tensor.py | 21 +++++++++++++++++++++ 3 files changed, 43 insertions(+) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index fd130f83474..c403b9677da 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -663,6 +663,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CSparseCOOTensor" arrow::SparseCOOTensor": shared_ptr[CDataType] type() shared_ptr[CBuffer] data() + const shared_ptr[CTensor] ToTensor() const vector[int64_t]& shape() int64_t size() @@ -679,6 +680,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CSparseCSRMatrix" arrow::SparseCSRMatrix": shared_ptr[CDataType] type() shared_ptr[CBuffer] data() + const shared_ptr[CTensor] ToTensor() const vector[int64_t]& shape() int64_t size() diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index fb2c3c0f852..fa6b357916e 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -202,6 +202,16 @@ shape: {0.shape}""".format(self) &out_data, &out_coords)) return PyObject_to_object(out_data), PyObject_to_object(out_coords) + def to_tensor(self): + """ + Convert arrow::SparseTensorCOO to arrow::Tensor + """ + + cdef shared_ptr[CTensor] ctensor + ctensor = self.stp.ToTensor() + + return pyarrow_wrap_tensor(ctensor) + def equals(self, SparseCOOTensor other): """ Return true if sparse tensors contains exactly equal data @@ -326,6 +336,16 @@ shape: {0.shape}""".format(self) return (PyObject_to_object(out_data), PyObject_to_object(out_indptr), PyObject_to_object(out_indices)) + def to_tensor(self): + """ + Convert arrow::SparseTensorCSR to arrow::Tensor + """ + + cdef shared_ptr[CTensor] ctensor + ctensor = self.stp.ToTensor() + + return pyarrow_wrap_tensor(ctensor) + def equals(self, SparseCSRMatrix other): """ Return true if sparse tensors contains exactly equal data diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py index 225bbbf56dc..c77c7d7a4ad 100644 --- a/python/pyarrow/tests/test_sparse_tensor.py +++ b/python/pyarrow/tests/test_sparse_tensor.py @@ -219,3 +219,24 @@ def test_sparse_tensor_csr_numpy_roundtrip(dtype_str, arrow_type): assert np.array_equal(indptr, result_indptr) assert np.array_equal(indices, result_indices) assert sparse_tensor.dim_names == dim_names + + +@pytest.mark.parametrize('sparse_tensor_type', [ + pa.SparseTensorCSR, + pa.SparseTensorCOO, +]) +@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs) +def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type): + dtype = np.dtype(dtype_str) + array = np.array([[4, 0, 9, 0], + [0, 7, 0, 0], + [0, 0, 0, 0], + [0, 0, 0, 5]]).astype(dtype) + + sparse_tensor = sparse_tensor_type.from_dense_numpy(array) + tensor = sparse_tensor.to_tensor() + result_array = tensor.to_numpy() + + assert sparse_tensor.type == arrow_type + assert tensor.type == arrow_type + assert np.array_equal(array, result_array) From 0eb11c692d8c656175f145dc1bdefcdeebcc6bc0 Mon Sep 17 00:00:00 2001 From: Rok Date: Thu, 17 Oct 2019 14:41:07 +0200 Subject: [PATCH 3/3] Implementing review feedback. --- cpp/src/arrow/sparse_tensor.cc | 67 +++++++++++----------- cpp/src/arrow/sparse_tensor.h | 8 ++- cpp/src/arrow/sparse_tensor_test.cc | 8 ++- python/pyarrow/includes/libarrow.pxd | 4 +- python/pyarrow/tensor.pxi | 18 +++--- python/pyarrow/tests/test_sparse_tensor.py | 4 +- 6 files changed, 58 insertions(+), 51 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index a1cee8be42c..3fd7008cb7d 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -365,24 +365,22 @@ void MakeSparseTensorFromTensor(const Tensor& tensor, } template -void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, - std::shared_ptr* tensor) { +Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor, + std::shared_ptr* out) { using c_index_value_type = typename IndexValueType::c_type; using NumericTensorType = NumericTensor; using value_type = typename NumericTensorType::value_type; std::shared_ptr values_buffer; - ARROW_CHECK_OK( - AllocateBuffer(sizeof(value_type) * sparse_tensor->size(), &values_buffer)); + RETURN_NOT_OK( + AllocateBuffer(pool, sizeof(value_type) * sparse_tensor->size(), &values_buffer)); auto values = reinterpret_cast(values_buffer->mutable_data()); - for (int64_t i = 0; i < sparse_tensor->size(); ++i) { - values[i] = 0; - } + std::fill_n(values, sparse_tensor->size(), static_cast(0)); switch (sparse_tensor->format_id()) { case SparseTensorFormat::COO: { - const SparseCOOIndex sparse_index = + const auto& sparse_index = internal::checked_cast(*sparse_tensor->sparse_index()); const std::shared_ptr coords = sparse_index.indices(); const auto raw_data = @@ -401,13 +399,13 @@ void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, } values[offset] = raw_data[i]; } - *tensor = std::make_shared(sparse_tensor->type(), values_buffer, - sparse_tensor->shape()); - break; + *out = std::make_shared(sparse_tensor->type(), values_buffer, + sparse_tensor->shape()); + return Status::OK(); } case SparseTensorFormat::CSR: { - const SparseCSRIndex sparse_index = + const auto& sparse_index = internal::checked_cast(*sparse_tensor->sparse_index()); const std::shared_ptr indptr = sparse_index.indptr(); const std::shared_ptr indices = sparse_index.indices(); @@ -423,32 +421,34 @@ void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, values[offset] = raw_data[j]; } } - *tensor = std::make_shared(sparse_tensor->type(), values_buffer, - sparse_tensor->shape()); - break; + *out = std::make_shared(sparse_tensor->type(), values_buffer, + sparse_tensor->shape()); + return Status::OK(); } } + return Status::NotImplemented("Unsupported SparseIndex format type"); } -#define MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE(IndexValueType) \ - case IndexValueType##Type::type_id: \ - MakeTensorFromSparseTensor(sparse_tensor, tensor); \ +#define MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE(IndexValueType) \ + case IndexValueType##Type::type_id: \ + return MakeTensorFromSparseTensor(pool, sparse_tensor, \ + out); \ break; template -void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, - std::shared_ptr* tensor) { +Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor, + std::shared_ptr* out) { std::shared_ptr type; switch (sparse_tensor->format_id()) { case SparseTensorFormat::COO: { - const SparseCOOIndex sparse_index = + const auto& sparse_index = internal::checked_cast(*sparse_tensor->sparse_index()); const std::shared_ptr indices = sparse_index.indices(); type = indices->type(); break; } case SparseTensorFormat::CSR: { - const SparseCSRIndex sparse_index = + const auto& sparse_index = internal::checked_cast(*sparse_tensor->sparse_index()); const std::shared_ptr indices = sparse_index.indices(); type = indices->type(); @@ -456,7 +456,7 @@ void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, } // LCOV_EXCL_START: ignore program failure default: - ARROW_LOG(FATAL) << "Unsupported SparseIndex value type"; + ARROW_LOG(FATAL) << "Unsupported SparseIndex format"; break; // LCOV_EXCL_STOP } @@ -466,25 +466,24 @@ void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, // LCOV_EXCL_START: ignore program failure default: ARROW_LOG(FATAL) << "Unsupported SparseIndex value type"; - break; + return Status::NotImplemented("Unsupported SparseIndex value type"); // LCOV_EXCL_STOP } } #undef MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE -#define MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE(TYPE) \ - case TYPE##Type::type_id: \ - MakeTensorFromSparseTensor(sparse_tensor, tensor); \ - break; +#define MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE(TYPE) \ + case TYPE##Type::type_id: \ + return MakeTensorFromSparseTensor(pool, sparse_tensor, out); -void MakeTensorFromSparseTensor(const SparseTensor* sparse_tensor, - std::shared_ptr* tensor) { +Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor, + std::shared_ptr* out) { switch (sparse_tensor->type()->id()) { ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE); // LCOV_EXCL_START: ignore program failure default: ARROW_LOG(FATAL) << "Unsupported SparseTensor value type"; - break; + return Status::NotImplemented("Unsupported SparseTensor data value type"); // LCOV_EXCL_STOP } } @@ -555,10 +554,8 @@ bool SparseTensor::Equals(const SparseTensor& other) const { return SparseTensorEquals(*this, other); } -std::shared_ptr SparseTensor::ToTensor() const { - std::shared_ptr tensor; - internal::MakeTensorFromSparseTensor(this, &tensor); - return tensor; +Status SparseTensor::ToTensor(MemoryPool* pool, std::shared_ptr* out) const { + return internal::MakeTensorFromSparseTensor(pool, this, out); } } // namespace arrow diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index e35e18cb215..d24a680df5b 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -205,7 +205,13 @@ class ARROW_EXPORT SparseTensor { bool Equals(const SparseTensor& other) const; /// \brief Return dense representation of sparse tensor as tensor - std::shared_ptr ToTensor() const; + Status ToTensor(std::shared_ptr* out) const { + return ToTensor(default_memory_pool(), out); + } + + /// \brief Return dense representation of sparse tensor as tensor + /// using specified memory pool + Status ToTensor(MemoryPool* pool, std::shared_ptr* out) const; protected: // Constructor with all attributes diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 1348a8bc213..5fcae47fe40 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -205,14 +205,15 @@ TEST_F(TestSparseCOOTensor, TensorEquality) { TEST_F(TestSparseCOOTensor, TestToTensor) { std::vector values = {1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4}; - std::vector shape({4, 3, 2}); + std::vector shape({4, 3, 2, 1}); std::shared_ptr buffer = Buffer::Wrap(values); Tensor tensor(int64(), buffer, shape, {}, this->dim_names_); SparseTensorImpl sparse_tensor(tensor); ASSERT_EQ(5, sparse_tensor.non_zero_length()); ASSERT_TRUE(sparse_tensor.is_mutable()); - std::shared_ptr dense_tensor = sparse_tensor.ToTensor(); + std::shared_ptr dense_tensor; + ASSERT_OK(sparse_tensor.ToTensor(&dense_tensor)); ASSERT_TRUE(tensor.Equals(*dense_tensor)); } @@ -493,7 +494,8 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { ASSERT_EQ(7, sparse_tensor.non_zero_length()); ASSERT_TRUE(sparse_tensor.is_mutable()); - std::shared_ptr dense_tensor = sparse_tensor.ToTensor(); + std::shared_ptr dense_tensor; + ASSERT_OK(sparse_tensor.ToTensor(&dense_tensor)); ASSERT_TRUE(tensor.Equals(*dense_tensor)); } } // namespace arrow diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index c403b9677da..dc29c10aed9 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -663,7 +663,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CSparseCOOTensor" arrow::SparseCOOTensor": shared_ptr[CDataType] type() shared_ptr[CBuffer] data() - const shared_ptr[CTensor] ToTensor() + CStatus ToTensor(shared_ptr[CTensor]*) const vector[int64_t]& shape() int64_t size() @@ -680,7 +680,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CSparseCSRMatrix" arrow::SparseCSRMatrix": shared_ptr[CDataType] type() shared_ptr[CBuffer] data() - const shared_ptr[CTensor] ToTensor() + CStatus ToTensor(shared_ptr[CTensor]*) const vector[int64_t]& shape() int64_t size() diff --git a/python/pyarrow/tensor.pxi b/python/pyarrow/tensor.pxi index fa6b357916e..4b93676ee07 100644 --- a/python/pyarrow/tensor.pxi +++ b/python/pyarrow/tensor.pxi @@ -175,7 +175,8 @@ shape: {0.shape}""".format(self) "SparseCOOTensor indices") check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(), - data, coords, c_shape, c_dim_names, &csparse_tensor)) + data, coords, c_shape, + c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_coo_tensor(csparse_tensor) @staticmethod @@ -204,11 +205,11 @@ shape: {0.shape}""".format(self) def to_tensor(self): """ - Convert arrow::SparseTensorCOO to arrow::Tensor + Convert arrow::SparseCOOTensor to arrow::Tensor """ cdef shared_ptr[CTensor] ctensor - ctensor = self.stp.ToTensor() + check_status(self.stp.ToTensor(&ctensor)) return pyarrow_wrap_tensor(ctensor) @@ -306,8 +307,8 @@ shape: {0.shape}""".format(self) "SparseCSRMatrix indices") check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(), - data, indptr, indices, c_shape, c_dim_names, - &csparse_tensor)) + data, indptr, indices, c_shape, + c_dim_names, &csparse_tensor)) return pyarrow_wrap_sparse_csr_matrix(csparse_tensor) @staticmethod @@ -332,17 +333,18 @@ shape: {0.shape}""".format(self) cdef PyObject* out_indices check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self, - &out_data, &out_indptr, &out_indices)) + &out_data, &out_indptr, + &out_indices)) return (PyObject_to_object(out_data), PyObject_to_object(out_indptr), PyObject_to_object(out_indices)) def to_tensor(self): """ - Convert arrow::SparseTensorCSR to arrow::Tensor + Convert arrow::SparseCSRMatrix to arrow::Tensor """ cdef shared_ptr[CTensor] ctensor - ctensor = self.stp.ToTensor() + check_status(self.stp.ToTensor(&ctensor)) return pyarrow_wrap_tensor(ctensor) diff --git a/python/pyarrow/tests/test_sparse_tensor.py b/python/pyarrow/tests/test_sparse_tensor.py index c77c7d7a4ad..aaf0468f982 100644 --- a/python/pyarrow/tests/test_sparse_tensor.py +++ b/python/pyarrow/tests/test_sparse_tensor.py @@ -222,8 +222,8 @@ def test_sparse_tensor_csr_numpy_roundtrip(dtype_str, arrow_type): @pytest.mark.parametrize('sparse_tensor_type', [ - pa.SparseTensorCSR, - pa.SparseTensorCOO, + pa.SparseCSRMatrix, + pa.SparseCOOTensor, ]) @pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs) def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):