Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/src/arrow/python/numpy_convert.cc
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ Status SparseCOOTensorToNdarray(const std::shared_ptr<SparseCOOTensor>& sparse_t
// Wrap tensor data
OwnedRef result_data;
RETURN_NOT_OK(SparseTensorDataToNdarray(
*sparse_tensor, {sparse_index.non_zero_length(), 1}, base, result_data.ref()));
*sparse_tensor, {sparse_tensor->non_zero_length(), 1}, base, result_data.ref()));

// Wrap indices
PyObject* result_coords;
Expand All @@ -344,7 +344,7 @@ Status SparseCSRMatrixToNdarray(const std::shared_ptr<SparseCSRMatrix>& sparse_t
// Wrap tensor data
OwnedRef result_data;
RETURN_NOT_OK(SparseTensorDataToNdarray(
*sparse_tensor, {sparse_index.non_zero_length(), 1}, base, result_data.ref()));
*sparse_tensor, {sparse_tensor->non_zero_length(), 1}, base, result_data.ref()));

// Wrap indices
OwnedRef result_indptr;
Expand Down
129 changes: 129 additions & 0 deletions cpp/src/arrow/sparse_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,131 @@ void MakeSparseTensorFromTensor(const Tensor& tensor,
}
}

template <typename TYPE, typename IndexValueType>
Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor,
std::shared_ptr<Tensor>* out) {
using c_index_value_type = typename IndexValueType::c_type;
using NumericTensorType = NumericTensor<TYPE>;
using value_type = typename NumericTensorType::value_type;

std::shared_ptr<Buffer> values_buffer;
RETURN_NOT_OK(
AllocateBuffer(pool, sizeof(value_type) * sparse_tensor->size(), &values_buffer));
auto values = reinterpret_cast<value_type*>(values_buffer->mutable_data());

std::fill_n(values, sparse_tensor->size(), static_cast<value_type>(0));

switch (sparse_tensor->format_id()) {
case SparseTensorFormat::COO: {
const auto& sparse_index =
internal::checked_cast<const SparseCOOIndex&>(*sparse_tensor->sparse_index());
const std::shared_ptr<const Tensor> coords = sparse_index.indices();
const auto raw_data =
reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
std::vector<int64_t> strides(sparse_tensor->ndim(), 1);

for (int i = sparse_tensor->ndim() - 1; i > 0; --i) {
strides[i - 1] *= strides[i] * sparse_tensor->shape()[i];
}
for (int64_t i = 0; i < sparse_tensor->non_zero_length(); ++i) {
std::vector<c_index_value_type> coord(sparse_tensor->ndim());
int64_t offset = 0;
for (int64_t j = 0; j < static_cast<int>(coord.size()); ++j) {
coord[j] = coords->Value<IndexValueType>({i, j});
offset += coord[j] * strides[j];
}
values[offset] = raw_data[i];
}
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
sparse_tensor->shape());
return Status::OK();
}

case SparseTensorFormat::CSR: {
const auto& sparse_index =
internal::checked_cast<const SparseCSRIndex&>(*sparse_tensor->sparse_index());
const std::shared_ptr<const Tensor> indptr = sparse_index.indptr();
const std::shared_ptr<const Tensor> indices = sparse_index.indices();
const auto raw_data =
reinterpret_cast<const value_type*>(sparse_tensor->raw_data());

int64_t offset;
for (int64_t i = 0; i < indptr->size() - 1; ++i) {
const int64_t start = indptr->Value<IndexValueType>({i});
const int64_t stop = indptr->Value<IndexValueType>({i + 1});
for (int64_t j = start; j < stop; ++j) {
offset = indices->Value<IndexValueType>({j}) + i * sparse_tensor->shape()[1];
values[offset] = raw_data[j];
}
}
*out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
sparse_tensor->shape());
return Status::OK();
}
}
return Status::NotImplemented("Unsupported SparseIndex format type");
}

#define MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE(IndexValueType) \
case IndexValueType##Type::type_id: \
return MakeTensorFromSparseTensor<TYPE, IndexValueType##Type>(pool, sparse_tensor, \
out); \
break;

template <typename TYPE>
Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor,
std::shared_ptr<Tensor>* out) {
std::shared_ptr<DataType> type;
switch (sparse_tensor->format_id()) {
case SparseTensorFormat::COO: {
const auto& sparse_index =
internal::checked_cast<const SparseCOOIndex&>(*sparse_tensor->sparse_index());
const std::shared_ptr<const Tensor> indices = sparse_index.indices();
type = indices->type();
break;
}
case SparseTensorFormat::CSR: {
const auto& sparse_index =
internal::checked_cast<const SparseCSRIndex&>(*sparse_tensor->sparse_index());
const std::shared_ptr<const Tensor> indices = sparse_index.indices();
type = indices->type();
break;
}
// LCOV_EXCL_START: ignore program failure
default:
ARROW_LOG(FATAL) << "Unsupported SparseIndex format";
break;
// LCOV_EXCL_STOP
}

switch (type->id()) {
ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE);
// LCOV_EXCL_START: ignore program failure
default:
ARROW_LOG(FATAL) << "Unsupported SparseIndex value type";
return Status::NotImplemented("Unsupported SparseIndex value type");
// LCOV_EXCL_STOP
}
}
#undef MAKE_TENSOR_FROM_SPARSE_TENSOR_INDEX_TYPE

#define MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE(TYPE) \
case TYPE##Type::type_id: \
return MakeTensorFromSparseTensor<TYPE##Type>(pool, sparse_tensor, out);

Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor,
std::shared_ptr<Tensor>* out) {
switch (sparse_tensor->type()->id()) {
ARROW_GENERATE_FOR_ALL_NUMERIC_TYPES(MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE);
// LCOV_EXCL_START: ignore program failure
default:
ARROW_LOG(FATAL) << "Unsupported SparseTensor value type";
return Status::NotImplemented("Unsupported SparseTensor data value type");
// LCOV_EXCL_STOP
}
}
#undef MAKE_TENSOR_FROM_SPARSE_TENSOR_VALUE_TYPE

} // namespace internal

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -429,4 +554,8 @@ bool SparseTensor::Equals(const SparseTensor& other) const {
return SparseTensorEquals(*this, other);
}

Status SparseTensor::ToTensor(MemoryPool* pool, std::shared_ptr<Tensor>* out) const {
return internal::MakeTensorFromSparseTensor(pool, this, out);
}

} // namespace arrow
9 changes: 9 additions & 0 deletions cpp/src/arrow/sparse_tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,15 @@ class ARROW_EXPORT SparseTensor {
/// \brief Return whether sparse tensors are equal
bool Equals(const SparseTensor& other) const;

/// \brief Return dense representation of sparse tensor as tensor
Status ToTensor(std::shared_ptr<Tensor>* out) const {
return ToTensor(default_memory_pool(), out);
}

/// \brief Return dense representation of sparse tensor as tensor
/// using specified memory pool
Status ToTensor(MemoryPool* pool, std::shared_ptr<Tensor>* out) const;

protected:
// Constructor with all attributes
SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
Expand Down
29 changes: 29 additions & 0 deletions cpp/src/arrow/sparse_tensor_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,21 @@ TEST_F(TestSparseCOOTensor, TensorEquality) {
ASSERT_FALSE(st1.Equals(st2));
}

TEST_F(TestSparseCOOTensor, TestToTensor) {
std::vector<int64_t> values = {1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4};
std::vector<int64_t> shape({4, 3, 2, 1});
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
SparseTensorImpl<SparseCOOIndex> sparse_tensor(tensor);

ASSERT_EQ(5, sparse_tensor.non_zero_length());
ASSERT_TRUE(sparse_tensor.is_mutable());
std::shared_ptr<Tensor> dense_tensor;
ASSERT_OK(sparse_tensor.ToTensor(&dense_tensor));
ASSERT_TRUE(tensor.Equals(*dense_tensor));
}

template <typename IndexValueType>
class TestSparseCOOTensorForIndexValueType
: public TestSparseCOOTensorBase<IndexValueType> {
Expand Down Expand Up @@ -469,4 +484,18 @@ TEST_F(TestSparseCSRMatrix, TensorEquality) {
ASSERT_FALSE(st1.Equals(st2));
}

TEST_F(TestSparseCSRMatrix, TestToTensor) {
std::vector<int64_t> values = {1, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 1,
0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 1};
std::vector<int64_t> shape({6, 4});
std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
SparseTensorImpl<SparseCSRIndex> sparse_tensor(tensor);

ASSERT_EQ(7, sparse_tensor.non_zero_length());
ASSERT_TRUE(sparse_tensor.is_mutable());
std::shared_ptr<Tensor> dense_tensor;
ASSERT_OK(sparse_tensor.ToTensor(&dense_tensor));
ASSERT_TRUE(tensor.Equals(*dense_tensor));
}
} // namespace arrow
2 changes: 2 additions & 0 deletions python/pyarrow/includes/libarrow.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CSparseCOOTensor" arrow::SparseCOOTensor":
shared_ptr[CDataType] type()
shared_ptr[CBuffer] data()
CStatus ToTensor(shared_ptr[CTensor]*)

const vector[int64_t]& shape()
int64_t size()
Expand All @@ -679,6 +680,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CSparseCSRMatrix" arrow::SparseCSRMatrix":
shared_ptr[CDataType] type()
shared_ptr[CBuffer] data()
CStatus ToTensor(shared_ptr[CTensor]*)

const vector[int64_t]& shape()
int64_t size()
Expand Down
30 changes: 26 additions & 4 deletions python/pyarrow/tensor.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ shape: {0.shape}""".format(self)
"SparseCOOTensor indices")

check_status(NdarraysToSparseCOOTensor(c_default_memory_pool(),
data, coords, c_shape, c_dim_names, &csparse_tensor))
data, coords, c_shape,
c_dim_names, &csparse_tensor))
return pyarrow_wrap_sparse_coo_tensor(csparse_tensor)

@staticmethod
Expand All @@ -202,6 +203,16 @@ shape: {0.shape}""".format(self)
&out_data, &out_coords))
return PyObject_to_object(out_data), PyObject_to_object(out_coords)

def to_tensor(self):
"""
Convert arrow::SparseCOOTensor to arrow::Tensor
"""

cdef shared_ptr[CTensor] ctensor
check_status(self.stp.ToTensor(&ctensor))

return pyarrow_wrap_tensor(ctensor)

def equals(self, SparseCOOTensor other):
"""
Return true if sparse tensors contains exactly equal data
Expand Down Expand Up @@ -296,8 +307,8 @@ shape: {0.shape}""".format(self)
"SparseCSRMatrix indices")

check_status(NdarraysToSparseCSRMatrix(c_default_memory_pool(),
data, indptr, indices, c_shape, c_dim_names,
&csparse_tensor))
data, indptr, indices, c_shape,
c_dim_names, &csparse_tensor))
return pyarrow_wrap_sparse_csr_matrix(csparse_tensor)

@staticmethod
Expand All @@ -322,10 +333,21 @@ shape: {0.shape}""".format(self)
cdef PyObject* out_indices

check_status(SparseCSRMatrixToNdarray(self.sp_sparse_tensor, self,
&out_data, &out_indptr, &out_indices))
&out_data, &out_indptr,
&out_indices))
return (PyObject_to_object(out_data), PyObject_to_object(out_indptr),
PyObject_to_object(out_indices))

def to_tensor(self):
"""
Convert arrow::SparseCSRMatrix to arrow::Tensor
"""

cdef shared_ptr[CTensor] ctensor
check_status(self.stp.ToTensor(&ctensor))

return pyarrow_wrap_tensor(ctensor)

def equals(self, SparseCSRMatrix other):
"""
Return true if sparse tensors contains exactly equal data
Expand Down
21 changes: 21 additions & 0 deletions python/pyarrow/tests/test_sparse_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,3 +219,24 @@ def test_sparse_tensor_csr_numpy_roundtrip(dtype_str, arrow_type):
assert np.array_equal(indptr, result_indptr)
assert np.array_equal(indices, result_indices)
assert sparse_tensor.dim_names == dim_names


@pytest.mark.parametrize('sparse_tensor_type', [
pa.SparseCSRMatrix,
pa.SparseCOOTensor,
])
@pytest.mark.parametrize('dtype_str,arrow_type', tensor_type_pairs)
def test_dense_to_sparse_tensor(dtype_str, arrow_type, sparse_tensor_type):
dtype = np.dtype(dtype_str)
array = np.array([[4, 0, 9, 0],
[0, 7, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 5]]).astype(dtype)

sparse_tensor = sparse_tensor_type.from_dense_numpy(array)
tensor = sparse_tensor.to_tensor()
result_array = tensor.to_numpy()

assert sparse_tensor.type == arrow_type
assert tensor.type == arrow_type
assert np.array_equal(array, result_array)