From 2d10104050604826f1b9d7077fd39da9129ae8be Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Fri, 8 Nov 2019 01:03:44 +0100
Subject: [PATCH 01/18] WIP

---
 cpp/src/arrow/sparse_tensor.cc      | 83 ++++++++++++++++++++++++++---
 cpp/src/arrow/sparse_tensor.h       | 65 ++++++++++++++++++++++
 cpp/src/arrow/sparse_tensor_test.cc | 72 +++++++++++++++++++++++++
 format/SparseTensor.fbs             | 13 +++++
 4 files changed, 225 insertions(+), 8 deletions(-)
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index d42bdf4ca61..ad528f0cf4d 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -507,6 +507,33 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
   }
 }
 
+template <typename TYPE, typename IndexValueType>
+void assign_values(int64_t dimension_index, int64_t offset, int64_t first_ptr,
+                   int64_t last_ptr, const SparseCSFIndex* sparse_index,
+                   const int64_t* raw_data, const std::vector<int64_t> strides,
+                   TYPE* out) {
+  auto indices_offset = sparse_index->indices_offsets()[dimension_index];
+  auto indptr_offset = sparse_index->indptr_offsets()[dimension_index];
+  int64_t ndim = sparse_index->indices_offsets().size();
+
+  if (dimension_index == 0 && ndim > 1)
+    last_ptr = sparse_index->indptr_offsets()[dimension_index + 1] - 1;
+
+  for (int64_t i = first_ptr; i < last_ptr; ++i) {
+    int64_t tmp_offset =
+        offset + sparse_index->indices()->Value<IndexValueType>({indices_offset + i}) *
+                     strides[dimension_index];
+    if (dimension_index < ndim - 1)
+      assign_values<TYPE, IndexValueType>(
+          dimension_index + 1, tmp_offset,
+          sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i}),
+          sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i + 1}),
+          sparse_index, raw_data, strides, out);
+    else
+      out[tmp_offset] = raw_data[i];
+  }
+}
+
 template <typename TYPE, typename IndexValueType>
 Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor,
                                   std::shared_ptr<Tensor>* out) {
@@ -521,18 +548,18 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
 
   std::fill_n(values, sparse_tensor->size(), static_cast<value_type>(0));
 
+  std::vector<int64_t> strides(sparse_tensor->ndim(), 1);
+  for (int i = sparse_tensor->ndim() - 1; i > 0; --i)
+    strides[i - 1] *= strides[i] * sparse_tensor->shape()[i];
+
+  const auto raw_data = reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
+
   switch (sparse_tensor->format_id()) {
     case SparseTensorFormat::COO: {
       const auto& sparse_index =
           internal::checked_cast<const SparseCOOIndex&>(*sparse_tensor->sparse_index());
       const std::shared_ptr<const Tensor> coords = sparse_index.indices();
-      const auto raw_data =
-          reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
-      std::vector<int64_t> strides(sparse_tensor->ndim(), 1);
 
-      for (int i = sparse_tensor->ndim() - 1; i > 0; --i) {
-        strides[i - 1] *= strides[i] * sparse_tensor->shape()[i];
-      }
       for (int64_t i = 0; i < sparse_tensor->non_zero_length(); ++i) {
         std::vector<c_index_value_type> coord(sparse_tensor->ndim());
         int64_t offset = 0;
@@ -552,8 +579,6 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
           internal::checked_cast<const SparseCSRIndex&>(*sparse_tensor->sparse_index());
       const std::shared_ptr<const Tensor> indptr = sparse_index.indptr();
       const std::shared_ptr<const Tensor> indices = sparse_index.indices();
-      const auto raw_data =
-          reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
 
       int64_t offset;
       for (int64_t i = 0; i < indptr->size() - 1; ++i) {
@@ -590,6 +615,17 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
                                       sparse_tensor->shape());
       return Status::OK();
     }
+
+    case SparseTensorFormat::CSF: {
+      const auto& sparse_index =
+          internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
+      assign_values<value_type, IndexValueType>(
+          0, 0, 0, 0, &sparse_index,
+          reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides, values);
+      *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
+                                      sparse_tensor->shape());
+      return Status::OK();
+    }
   }
   return Status::NotImplemented("Unsupported SparseIndex format type");
 }
@@ -625,6 +661,13 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
       const std::shared_ptr<const Tensor> indices = sparse_index.indices();
       type = indices->type();
       break;
+    }
+    case SparseTensorFormat::CSF: {
+      const auto& sparse_index =
+          internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
+      const std::shared_ptr<const Tensor> indices = sparse_index.indices();
+      type = indices->type();
+      break;
     }
       // LCOV_EXCL_START: ignore program failure
     default:
@@ -754,6 +797,30 @@ void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
 
 }  // namespace internal
 
+// ----------------------------------------------------------------------
+// SparseCSFIndex
+
+// Constructor with two index vectors
+SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
+                               const std::shared_ptr<Tensor>& indices,
+                               const std::vector<int64_t>& indptr_offsets,
+                               const std::vector<int64_t>& indices_offsets,
+                               const std::vector<int64_t>& axis_order)
+    : SparseIndexBase(indices->shape()[0] - indices_offsets.back()),
+      indptr_(indptr),
+      indices_(indices),
+      indptr_offsets_(indptr_offsets),
+      indices_offsets_(indices_offsets),
+      axis_order_(axis_order) {
+  ARROW_CHECK(is_integer(indptr_->type_id()));
+  ARROW_CHECK_EQ(1, indptr_->ndim());
+  ARROW_CHECK(is_integer(indices_->type_id()));
+  ARROW_CHECK_EQ(1, indices_->ndim());
+  ARROW_CHECK_EQ(indptr_offsets_.size() + 1, indices_offsets_.size());
+}
+
+std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
+
 // ----------------------------------------------------------------------
 // SparseTensor
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index f736f7b7576..cdcbbe1ddc4 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -40,6 +40,8 @@ struct SparseTensorFormat {
     CSR,
     /// Compressed sparse column (CSC) format.
     CSC,
+    /// Compressed sparse fiber (CSF) format.
+    CSF
   };
 };
 
@@ -329,6 +331,66 @@ class ARROW_EXPORT SparseCSCIndex
   using SparseCSXIndex::SparseCSXIndex;
 };
 
+// ----------------------------------------------------------------------
+// SparseCSFIndex class
+
+/// \brief EXPERIMENTAL: The index data for a CSF sparse tensor
+///
+/// A CSF sparse index manages the location of its non-zero values by two
+/// vectors.
+/// TODO:rok, documentation
+/// The first vector, called indptr, represents the range of the rows; the i-th
+/// row spans from indptr[i] to indptr[i+1] in the corresponding value vector.
+/// So the length of an indptr vector is the number of rows + 1.
+///
+/// The other vector, called indices, represents the column indices of the
+/// corresponding non-zero values.  So the length of an indices vector is same
+/// as the number of non-zero-values.
+class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIndex> {
+ public:
+  static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
+
+  /// \brief Construct SparseCSFIndex from two index vectors
+  explicit SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
+                          const std::shared_ptr<Tensor>& indices,
+                          const std::vector<int64_t>& indptr_offsets,
+                          const std::vector<int64_t>& indices_offsets,
+                          const std::vector<int64_t>& axis_order);
+
+  /// \brief Return a 1D tensor of indptr vector
+  const std::shared_ptr<Tensor>& indptr() const { return indptr_; }
+
+  /// \brief Return a 1D tensor of indices vector
+  const std::shared_ptr<Tensor>& indices() const { return indices_; }
+
+  /// \brief Return a 1D vector of indptr offsets
+  const std::vector<int64_t>& indptr_offsets() const { return indptr_offsets_; }
+
+  /// \brief Return a vector of indices offsets
+  const std::vector<int64_t>& indices_offsets() const { return indices_offsets_; }
+
+  /// \brief Return a 1D vector specifying the order of axes
+  const std::vector<int64_t>& axis_order() const { return axis_order_; }
+
+  /// \brief Return a string representation of the sparse index
+  std::string ToString() const override;
+
+  /// \brief Return whether the CSF indices are equal
+  bool Equals(const SparseCSFIndex& other) const {
+    return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices()) &&
+           indptr_offsets() == other.indptr_offsets() &&
+           indices_offsets() == other.indices_offsets() &&
+           axis_order() == other.axis_order();
+  }
+
+ protected:
+  std::shared_ptr<Tensor> indptr_;
+  std::shared_ptr<Tensor> indices_;
+  std::vector<int64_t> indptr_offsets_;
+  std::vector<int64_t> indices_offsets_;
+  std::vector<int64_t> axis_order_;
+};
+
 // ----------------------------------------------------------------------
 // SparseTensor class
 
@@ -527,6 +589,9 @@ using SparseCSRMatrix = SparseTensorImpl<SparseCSRIndex>;
 /// \brief EXPERIMENTAL: Type alias for CSC sparse matrix
 using SparseCSCMatrix = SparseTensorImpl<SparseCSCIndex>;
 
+/// \brief EXPERIMENTAL: Type alias for CSF sparse matrix
+using SparseCSFTensor = SparseTensorImpl<SparseCSFIndex>;
+
 }  // namespace arrow
 
 #endif  // ARROW_SPARSE_TENSOR_H
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 198aa8f5f8d..91588ac27a7 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -910,4 +910,76 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) {
   ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
 
+template <typename IndexValueType>
+class TestSparseCSFTensorBase : public ::testing::Test {
+public:
+    void SetUp() {
+        shape_ = {6, 4};
+        dim_names_ = {"foo", "bar"};
+
+        // Dense representation:
+        // [
+        //    1  0  2  0
+        //    0  3  0  4
+        //    5  0  6  0
+        //    0 11  0 12
+        //   13  0 14  0
+        //    0 15  0 16
+        // ]
+        std::vector<int64_t> dense_values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                             0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+        auto dense_data = Buffer::Wrap(dense_values);
+        NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
+    }
+
+protected:
+    std::vector<int64_t> shape_;
+    std::vector<std::string> dim_names_;
+    std::shared_ptr<SparseCSFTensor> sparse_tensor_from_dense_;
+};
+
+class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
+
+TEST_F(TestSparseCSFTensor, TestToTensor) {
+  std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
+  std::vector<int64_t> indptr_values = {0, 2, 3, 0, 1, 3, 4, 0, 2, 4, 5, 8};
+  std::vector<int64_t> indices_values = {1, 2, 1, 2, 2, 1, 1, 2, 2,
+                                         2, 3, 1, 3, 1, 1, 2, 3};
+  std::vector<int64_t> indices_offsets = {0, 2, 5, 9};
+  std::vector<int64_t> indptr_offsets = {0, 3, 7};
+  std::vector<int64_t> axis_order = {0, 1, 2, 3};
+  std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
+  std::vector<int64_t> indptr_shape({12});
+  std::vector<int64_t> indices_shape({17});
+  std::vector<std::string> dim_names({"a", "b", "c", "d"});
+
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
+  std::shared_ptr<Buffer> indptr_buffer = Buffer::Wrap(indptr_values);
+  std::shared_ptr<Buffer> indices_buffer = Buffer::Wrap(indices_values);
+
+  std::shared_ptr<Tensor> indptr =
+      std::make_shared<Tensor>(int64(), indptr_buffer, indptr_shape);
+  std::shared_ptr<Tensor> indices =
+      std::make_shared<Tensor>(int64(), indices_buffer, indices_shape);
+
+  std::shared_ptr<SparseCSFIndex> sparse_index = std::make_shared<SparseCSFIndex>(
+      indptr, indices, indptr_offsets, indices_offsets, axis_order);
+  std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
+      sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
+
+  ASSERT_EQ(8, sparse_tensor->non_zero_length());
+
+  std::shared_ptr<Tensor> dense_tensor;
+  ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor));
+
+  std::vector<int64_t> dense_values = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+  auto dense_data = Buffer::Wrap(dense_values);
+  Tensor tensor(int64(), dense_data, sparse_tensor_shape, {});
+
+  ASSERT_TRUE(tensor.Equals(*dense_tensor));
+}
 }  // namespace arrow
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 1de67eed19a..b22c8c718a2 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -114,9 +114,22 @@ table SparseMatrixIndexCSX {
   indicesBuffer: Buffer (required);
 }
 
+/// Compressed Sparse Fiber (CSF) sparse tensor format
+///
+/// CSF is a generalization of compressed sparse row (CSR) index.
+/// CSF compresses a tensor into one three one-dimensional tensors.
+table SparseTensorIndexCSF {
+  indptrType: Int;
+  indptrBuffer: Buffer;
+  indicesType: Int;
+  indicesBuffer: Buffer;
+  axisOrder: [long];
+}
+
 union SparseTensorIndex {
   SparseTensorIndexCOO,
   SparseMatrixIndexCSX,
+  SparseTensorIndexCSF
 }
 
 table SparseTensor {

From 6b938f7daf0255d52cb99d3775239568cfc027c1 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Sat, 16 Nov 2019 22:46:59 +0100
Subject: [PATCH 02/18] Documentation.

---
 cpp/src/arrow/python/serialize.cc |  4 ++
 cpp/src/arrow/sparse_tensor.cc    |  7 +++-
 cpp/src/arrow/sparse_tensor.h     | 17 ++++----
 format/SparseTensor.fbs           | 69 +++++++++++++++++++++++++++++--
 4 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc
index 09a322b1060..88d763b7877 100644
--- a/cpp/src/arrow/python/serialize.cc
+++ b/cpp/src/arrow/python/serialize.cc
@@ -654,6 +654,7 @@ Status CountSparseTensors(
   OwnedRef num_sparse_tensors(PyDict_New());
   size_t num_coo = 0;
   size_t num_csr = 0;
+  size_t num_csf = 0;
 
   for (const auto& sparse_tensor : sparse_tensors) {
     switch (sparse_tensor->format_id()) {
@@ -665,12 +666,15 @@ Status CountSparseTensors(
         break;
       case SparseTensorFormat::CSC:
         // TODO(mrkn): support csc
+      case SparseTensorFormat::CSF:
+        ++num_csf;
         break;
     }
   }
 
   PyDict_SetItemString(num_sparse_tensors.obj(), "coo", PyLong_FromSize_t(num_coo));
   PyDict_SetItemString(num_sparse_tensors.obj(), "csr", PyLong_FromSize_t(num_csr));
+  PyDict_SetItemString(num_sparse_tensors.obj(), "csf", PyLong_FromSize_t(num_csf));
   RETURN_IF_PYERROR();
 
   *out = num_sparse_tensors.detach();
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index ad528f0cf4d..19f0a9edb62 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -438,6 +438,7 @@ class SparseTensorConverter<TYPE, SparseCSCIndex>
 INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCOOIndex);
 INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSRIndex);
 INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSCIndex);
+INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSFIndex);
 
 }  // namespace
 
@@ -500,6 +501,9 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
     case SparseTensorFormat::CSC:
       return MakeSparseTensorFromTensor<SparseCSCIndex>(tensor, index_value_type, pool,
                                                         out_sparse_index, out_data);
+    case SparseTensorFormat::CSF:
+      return Status::Invalid("Unsupported Tensor value type");
+
     // LCOV_EXCL_START: ignore program failure
     default:
       return Status::Invalid("Invalid sparse tensor format");
@@ -530,7 +534,7 @@ void assign_values(int64_t dimension_index, int64_t offset, int64_t first_ptr,
           sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i + 1}),
           sparse_index, raw_data, strides, out);
     else
-      out[tmp_offset] = raw_data[i];
+      out[tmp_offset] = static_cast<TYPE>(raw_data[i]);
   }
 }
 
@@ -817,6 +821,7 @@ SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
   ARROW_CHECK(is_integer(indices_->type_id()));
   ARROW_CHECK_EQ(1, indices_->ndim());
   ARROW_CHECK_EQ(indptr_offsets_.size() + 1, indices_offsets_.size());
+  ARROW_CHECK_EQ(axis_order_.size(), indices_offsets_.size());
 }
 
 std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index cdcbbe1ddc4..a8e38b89ad2 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -336,16 +336,15 @@ class ARROW_EXPORT SparseCSCIndex
 
 /// \brief EXPERIMENTAL: The index data for a CSF sparse tensor
 ///
-/// A CSF sparse index manages the location of its non-zero values by two
-/// vectors.
-/// TODO:rok, documentation
-/// The first vector, called indptr, represents the range of the rows; the i-th
-/// row spans from indptr[i] to indptr[i+1] in the corresponding value vector.
-/// So the length of an indptr vector is the number of rows + 1.
+/// A CSF sparse index manages the location of its non-zero values by set of
+/// prefix trees. Each path from a root to leaf forms one tensor non-zero index.
+/// CSF is implemented with five vectors.
 ///
-/// The other vector, called indices, represents the column indices of the
-/// corresponding non-zero values.  So the length of an indices vector is same
-/// as the number of non-zero-values.
+/// Vectors indptr and indices are split into N-1 segments (by indptr_offsets) and
+/// N segments (by indices_offsetsy, where N is the number of dimensions.
+/// Indptr and indices segments describe the set of prefix trees.
+///
+/// Trees traverse dimensions in order given by axis_order.
 class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIndex> {
  public:
   static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index b22c8c718a2..12e9f870376 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -114,15 +114,76 @@ table SparseMatrixIndexCSX {
   indicesBuffer: Buffer (required);
 }
 
-/// Compressed Sparse Fiber (CSF) sparse tensor format
-///
-/// CSF is a generalization of compressed sparse row (CSR) index.
-/// CSF compresses a tensor into one three one-dimensional tensors.
+/// Compressed Sparse Fiber (CSF) sparse tensor index.
 table SparseTensorIndexCSF {
+  /// CSF is a generalization of compressed sparse row (CSR) index.
+  /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
+  ///
+  /// CSF recursively compresses each mode of the tensor into a set
+  /// of prefix trees. Each path from a root to leaf forms one tensor
+  /// non-zero index. CSF is implemented with two buffers and three arrays.
+  ///
+  /// For example, let X be a 3x3x3x4 tensor, and it has the following
+  /// 8 non-zero values:
+  ///
+  ///   X[1, 1, 1, 2] := 1
+  ///   X[1, 1, 1, 3] := 2
+  ///   X[1, 2, 1, 1] := 3
+  ///   X[1, 2, 1, 3] := 4
+  ///   X[1, 2, 2, 1] := 5
+  ///   X[2, 2, 2, 1] := 6
+  ///   X[2, 2, 2, 2] := 7
+  ///   X[2, 2, 2, 3] := 8
+  ///
+  /// As a prefix tree this would be represented be:
+  ///
+  ///         1          2
+  ///        / \         |
+  ///       1   2        2
+  ///      /   / \       |
+  ///     1   1   2      2
+  ///    / \ / \   \    /|\
+  ///   2  3 1  3   1  1 2 3
+
+  /// The type of values in indptrBuffer
   indptrType: Int;
+
+  /// indptrBuffer stores the sparsity structure.
+  /// For example, the indptrBuffer for the above X is:
+  ///
+  ///   indptrBuffer(X) = [0, 2, 3, 0, 1, 3, 4, 0, 2, 4, 5, 8].
+  ///
   indptrBuffer: Buffer;
+
+  /// indptrOffsets stores per dimension offset in indptrBuffer.
+  /// For example, the indptrOffsets for the above X is:
+  ///
+  ///   indptrOffsets(X) = [0, 3, 7].
+  ///
+  indptrOffsets: [int];
+
+  /// The type of values in indicesBuffer
   indicesType: Int;
+
+  /// indicesBuffer stores the label of each node,
+  /// For example, the indicesBuffer for the above X is:
+  ///
+  ///   indicesBuffer(X) = [1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 3, 1, 3, 1, 1, 2, 3].
+  ///
   indicesBuffer: Buffer;
+
+  /// indicesOffsets stores per dimension offset in indicesOffsets.
+  /// For example, the indicesBuffer for the above X is:
+  ///
+  ///   indicesOffsets(X) = [0, 2, 5, 9].
+  ///
+  indicesOffsets: [int];
+
+  /// axisOrder stores the sequence in which dimensions were traversed.
+  /// For example, the axisOrder for the above X is:
+  ///
+  ///   axisOrder(X) = [0, 1, 2, 3].
+  ///
   axisOrder: [long];
 }
 

From 05a47a546f9e91a086af81521155f98cc9367ffd Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Mon, 18 Nov 2019 17:46:34 +0100
Subject: [PATCH 03/18] Using axis_order in CSF.

---
 cpp/src/arrow/sparse_tensor.cc | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 19f0a9edb62..f3cfd42b1be 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -515,24 +515,25 @@ template <typename TYPE, typename IndexValueType>
 void assign_values(int64_t dimension_index, int64_t offset, int64_t first_ptr,
                    int64_t last_ptr, const SparseCSFIndex* sparse_index,
                    const int64_t* raw_data, const std::vector<int64_t> strides,
-                   TYPE* out) {
-  auto indices_offset = sparse_index->indices_offsets()[dimension_index];
-  auto indptr_offset = sparse_index->indptr_offsets()[dimension_index];
+                   const std::vector<int64_t> axis_order, TYPE* out) {
+  auto dimension = axis_order[dimension_index];
+  auto indices_offset = sparse_index->indices_offsets()[dimension];
+  auto indptr_offset = sparse_index->indptr_offsets()[dimension];
   int64_t ndim = sparse_index->indices_offsets().size();
 
-  if (dimension_index == 0 && ndim > 1)
-    last_ptr = sparse_index->indptr_offsets()[dimension_index + 1] - 1;
+  if (dimension == 0 && ndim > 1)
+    last_ptr = sparse_index->indptr_offsets()[dimension + 1] - 1;
 
   for (int64_t i = first_ptr; i < last_ptr; ++i) {
     int64_t tmp_offset =
         offset + sparse_index->indices()->Value<IndexValueType>({indices_offset + i}) *
-                     strides[dimension_index];
+                     strides[dimension];
     if (dimension_index < ndim - 1)
       assign_values<TYPE, IndexValueType>(
-          dimension_index + 1, tmp_offset,
+          dimension + 1, tmp_offset,
           sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i}),
           sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i + 1}),
-          sparse_index, raw_data, strides, out);
+          sparse_index, raw_data, strides, axis_order, out);
     else
       out[tmp_offset] = static_cast<TYPE>(raw_data[i]);
   }
@@ -625,7 +626,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
           internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
       assign_values<value_type, IndexValueType>(
           0, 0, 0, 0, &sparse_index,
-          reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides, values);
+          reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides,
+          sparse_index.axis_order(), values);
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
                                       sparse_tensor->shape());
       return Status::OK();

From 7d17995a47b0699326d176645fa0da522085309f Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Mon, 25 Nov 2019 00:20:39 +0100
Subject: [PATCH 04/18] Adding Tensor to SparseCSFTensor conversion.

---
 cpp/src/arrow/sparse_tensor.cc      | 153 +++++++++++++++++++++++++++-
 cpp/src/arrow/sparse_tensor_test.cc |  28 +++++
 2 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index f3cfd42b1be..f8f8bf425e3 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -419,6 +419,154 @@ class SparseTensorConverter<TYPE, SparseCSCIndex>
   inline Status CheckMaximumValue(const uint64_t) const { return Status::OK(); }
 };
 
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCSFIndex
+
+template <typename TYPE>
+class SparseTensorConverter<TYPE, SparseCSFIndex>
+    : private SparseTensorConverterBase<TYPE> {
+ public:
+  using BaseClass = SparseTensorConverterBase<TYPE>;
+  using typename BaseClass::NumericTensorType;
+  using typename BaseClass::value_type;
+
+  SparseTensorConverter(const NumericTensorType& tensor,
+                        const std::shared_ptr<DataType>& index_value_type,
+                        MemoryPool* pool)
+      : BaseClass(tensor, index_value_type, pool) {}
+
+  template <typename IndexValueType>
+  Status Convert() {
+    using c_index_value_type = typename IndexValueType::c_type;
+    const int64_t indices_elsize = sizeof(c_index_value_type);
+
+    std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
+    RETURN_NOT_OK(SparseCOOTensor::Make(tensor_, &sparse_coo_tensor));
+    std::shared_ptr<Tensor> coords =
+        arrow::internal::checked_pointer_cast<SparseCOOIndex>(
+            sparse_coo_tensor->sparse_index())
+            ->indices();
+
+    // Convert SparseCOOTensor to long CSF buffers
+    const int64_t ndim = tensor_.ndim();
+    const int64_t nonzero_count = sparse_coo_tensor->non_zero_length();
+
+    std::vector<int64_t> counts(ndim);
+    std::fill_n(counts.begin(), ndim, static_cast<int64_t>(0));
+
+    std::vector<int64_t> axis_order(ndim);
+    for (int64_t i = 0; i < ndim; ++i) axis_order[i] = i;
+
+    std::shared_ptr<Buffer> indices_buffer;
+    std::shared_ptr<Buffer> indptr_buffer;
+    RETURN_NOT_OK(
+        AllocateBuffer(pool_, indices_elsize * ndim * nonzero_count, &indices_buffer));
+    RETURN_NOT_OK(AllocateBuffer(pool_, indices_elsize * (ndim - 1) * (nonzero_count + 1),
+                                 &indptr_buffer));
+    int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
+    int64_t* indptr = reinterpret_cast<int64_t*>(indptr_buffer->mutable_data());
+
+    for (int64_t row = 0; row < nonzero_count; ++row) {
+      bool tree_split = false;
+      for (int64_t column = 0; column < ndim; ++column) {
+        bool change = coords->Value<IndexValueType>({row, column}) !=
+                      coords->Value<IndexValueType>({row - 1, column});
+
+        if (tree_split || change || row == 0) {
+          if (row > 1) tree_split = true;
+
+          indices[column * nonzero_count + counts[column]] =
+              coords->Value<IndexValueType>({row, column});
+          indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
+          ++counts[column];
+        }
+      }
+    }
+
+    for (int64_t column = 0; column < ndim; ++column) {
+      indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
+    }
+
+    int64_t total_size = counts[0];
+    for (int64_t column = 1; column < ndim; ++column) {
+      for (int64_t i = 0; i < counts[column] + 1; ++i) {
+        if (column < ndim - 1)
+          indptr[total_size + column + i] = indptr[column * (nonzero_count + 1) + i];
+        if (i < counts[column])
+          indices[total_size + i] = indices[column * nonzero_count + i];
+      }
+      total_size += counts[column];
+    }
+
+    // Copy CSF index data into smaller buffers
+    std::shared_ptr<Buffer> out_indices_buffer;
+    std::shared_ptr<Buffer> out_indptr_buffer;
+    RETURN_NOT_OK(
+        AllocateBuffer(pool_, indices_elsize * total_size, &out_indices_buffer));
+    RETURN_NOT_OK(AllocateBuffer(pool_,
+                                 indices_elsize * total_size - nonzero_count + ndim - 1,
+                                 &out_indptr_buffer));
+    int64_t* out_indices = reinterpret_cast<int64_t*>(out_indices_buffer->mutable_data());
+    int64_t* out_indptr = reinterpret_cast<int64_t*>(out_indptr_buffer->mutable_data());
+
+    for (int64_t i = 0; i < total_size; ++i) out_indices[i] = indices[i];
+
+    for (int64_t i = 0; i < total_size - nonzero_count + ndim - 1; ++i)
+      out_indptr[i] = indptr[i];
+
+    // Construct SparseCSFTensor
+    std::vector<int64_t> out_indptr_shape({total_size - nonzero_count + ndim - 1});
+    std::shared_ptr<Tensor> out_indptr_tensor =
+        std::make_shared<Tensor>(int64(), out_indptr_buffer, out_indptr_shape);
+
+    std::vector<int64_t> out_indices_shape({total_size});
+    std::shared_ptr<Tensor> out_indices_tensor =
+        std::make_shared<Tensor>(int64(), out_indices_buffer, out_indices_shape);
+
+    std::vector<int64_t> indptr_offsets(ndim - 1);
+    std::vector<int64_t> indices_offsets(ndim);
+    std::fill_n(indptr_offsets.begin(), ndim - 1, static_cast<int64_t>(0));
+    std::fill_n(indices_offsets.begin(), ndim, static_cast<int64_t>(0));
+
+    for (int64_t i = 0; i < ndim - 2; ++i)
+      indptr_offsets[i + 1] = indptr_offsets[i] + counts[i] + 1;
+
+    for (int64_t i = 0; i < ndim; ++i)
+      indices_offsets[i + 1] = indices_offsets[i] + counts[i];
+
+    sparse_index =
+        std::make_shared<SparseCSFIndex>(out_indptr_tensor, out_indices_tensor,
+                                         indptr_offsets, indices_offsets, axis_order);
+    data = sparse_coo_tensor->data();
+
+    return Status::OK();
+  }
+
+#define CALL_TYPE_SPECIFIC_CONVERT(TYPE_CLASS) \
+  case TYPE_CLASS##Type::type_id:              \
+    return Convert<TYPE_CLASS##Type>();
+
+  Status Convert() {
+    switch (index_value_type_->id()) {
+      ARROW_GENERATE_FOR_ALL_INTEGER_TYPES(CALL_TYPE_SPECIFIC_CONVERT);
+      // LCOV_EXCL_START: The following invalid causes program failure.
+      default:
+        return Status::TypeError("Unsupported SparseTensor index value type");
+        // LCOV_EXCL_STOP
+    }
+  }
+
+#undef CALL_TYPE_SPECIFIC_CONVERT
+
+  std::shared_ptr<SparseCSFIndex> sparse_index;
+  std::shared_ptr<Buffer> data;
+
+ private:
+  using BaseClass::index_value_type_;
+  using BaseClass::pool_;
+  using BaseClass::tensor_;
+};
+
 // ----------------------------------------------------------------------
 // Instantiate templates
 
@@ -502,7 +650,8 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
       return MakeSparseTensorFromTensor<SparseCSCIndex>(tensor, index_value_type, pool,
                                                         out_sparse_index, out_data);
     case SparseTensorFormat::CSF:
-      return Status::Invalid("Unsupported Tensor value type");
+      return MakeSparseTensorFromTensor<SparseCSFIndex>(tensor, index_value_type, pool,
+                                                        out_sparse_index, out_data);
 
     // LCOV_EXCL_START: ignore program failure
     default:
@@ -812,7 +961,7 @@ SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
                                const std::vector<int64_t>& indptr_offsets,
                                const std::vector<int64_t>& indices_offsets,
                                const std::vector<int64_t>& axis_order)
-    : SparseIndexBase(indices->shape()[0] - indices_offsets.back()),
+    : SparseIndexBase(indices->size() - indices_offsets.back()),
       indptr_(indptr),
       indices_(indices),
       indptr_offsets_(indptr_offsets),
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 91588ac27a7..5496df8b003 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -982,4 +982,32 @@ TEST_F(TestSparseCSFTensor, TestToTensor) {
 
   ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
+
+TEST_F(TestSparseCSFTensor, CreationFromTensor) {
+  std::vector<int64_t> values = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+  std::vector<int64_t> shape({3, 3, 3, 4});
+  std::vector<std::string> dim_names({"a", "b", "c", "d"});
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  Tensor tensor(int64(), buffer, shape, {}, dim_names);
+
+  std::shared_ptr<SparseCSFTensor> st;
+  ASSERT_OK(SparseCSFTensor::Make(tensor, &st));
+
+  ASSERT_EQ(8, st->non_zero_length());
+  ASSERT_TRUE(st->is_mutable());
+
+  ASSERT_EQ(dim_names, st->dim_names());
+  ASSERT_EQ("a", st->dim_name(0));
+  ASSERT_EQ("b", st->dim_name(1));
+  ASSERT_EQ("c", st->dim_name(2));
+  ASSERT_EQ("d", st->dim_name(3));
+
+  std::shared_ptr<Tensor> dt;
+  ASSERT_OK(st->ToTensor(&dt));
+  ASSERT_TRUE(tensor.Equals(*dt));
+}
 }  // namespace arrow

From f44d92cfd5c4a36b173f9a84e1c42ab8986c233f Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Mon, 25 Nov 2019 03:49:44 +0100
Subject: [PATCH 05/18] Adding SparseCSFIndex::Make.

---
 cpp/src/arrow/sparse_tensor.cc      | 63 ++++++++++++++++++++------
 cpp/src/arrow/sparse_tensor.h       | 11 +++++
 cpp/src/arrow/sparse_tensor_test.cc | 68 +++++++++++++++--------------
 3 files changed, 96 insertions(+), 46 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index f8f8bf425e3..e67fbfb3ebb 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -438,6 +438,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
   template <typename IndexValueType>
   Status Convert() {
     using c_index_value_type = typename IndexValueType::c_type;
+    RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
     const int64_t indices_elsize = sizeof(c_index_value_type);
 
     std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
@@ -463,8 +464,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
         AllocateBuffer(pool_, indices_elsize * ndim * nonzero_count, &indices_buffer));
     RETURN_NOT_OK(AllocateBuffer(pool_, indices_elsize * (ndim - 1) * (nonzero_count + 1),
                                  &indptr_buffer));
-    int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
-    int64_t* indptr = reinterpret_cast<int64_t*>(indptr_buffer->mutable_data());
+    auto* indices = reinterpret_cast<c_index_value_type*>(indices_buffer->mutable_data());
+    auto* indptr = reinterpret_cast<c_index_value_type*>(indptr_buffer->mutable_data());
 
     for (int64_t row = 0; row < nonzero_count; ++row) {
       bool tree_split = false;
@@ -477,16 +478,19 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
 
           indices[column * nonzero_count + counts[column]] =
               coords->Value<IndexValueType>({row, column});
-          indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
+          indptr[column * (nonzero_count + 1) + counts[column]] =
+              static_cast<c_index_value_type>(counts[column + 1]);
           ++counts[column];
         }
       }
     }
 
     for (int64_t column = 0; column < ndim; ++column) {
-      indptr[column * (nonzero_count + 1) + counts[column]] = counts[column + 1];
+      indptr[column * (nonzero_count + 1) + counts[column]] =
+          static_cast<c_index_value_type>(counts[column + 1]);
     }
 
+    // Remove gaps from buffers
     int64_t total_size = counts[0];
     for (int64_t column = 1; column < ndim; ++column) {
       for (int64_t i = 0; i < counts[column] + 1; ++i) {
@@ -506,8 +510,10 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     RETURN_NOT_OK(AllocateBuffer(pool_,
                                  indices_elsize * total_size - nonzero_count + ndim - 1,
                                  &out_indptr_buffer));
-    int64_t* out_indices = reinterpret_cast<int64_t*>(out_indices_buffer->mutable_data());
-    int64_t* out_indptr = reinterpret_cast<int64_t*>(out_indptr_buffer->mutable_data());
+    auto* out_indices =
+        reinterpret_cast<c_index_value_type*>(out_indices_buffer->mutable_data());
+    auto* out_indptr =
+        reinterpret_cast<c_index_value_type*>(out_indptr_buffer->mutable_data());
 
     for (int64_t i = 0; i < total_size; ++i) out_indices[i] = indices[i];
 
@@ -516,12 +522,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
 
     // Construct SparseCSFTensor
     std::vector<int64_t> out_indptr_shape({total_size - nonzero_count + ndim - 1});
-    std::shared_ptr<Tensor> out_indptr_tensor =
-        std::make_shared<Tensor>(int64(), out_indptr_buffer, out_indptr_shape);
-
     std::vector<int64_t> out_indices_shape({total_size});
-    std::shared_ptr<Tensor> out_indices_tensor =
-        std::make_shared<Tensor>(int64(), out_indices_buffer, out_indices_shape);
 
     std::vector<int64_t> indptr_offsets(ndim - 1);
     std::vector<int64_t> indices_offsets(ndim);
@@ -534,9 +535,11 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     for (int64_t i = 0; i < ndim; ++i)
       indices_offsets[i + 1] = indices_offsets[i] + counts[i];
 
-    sparse_index =
-        std::make_shared<SparseCSFIndex>(out_indptr_tensor, out_indices_tensor,
-                                         indptr_offsets, indices_offsets, axis_order);
+    sparse_index = std::make_shared<SparseCSFIndex>(
+        std::make_shared<Tensor>(index_value_type_, out_indptr_buffer, out_indptr_shape),
+        std::make_shared<Tensor>(index_value_type_, out_indices_buffer,
+                                 out_indices_shape),
+        indptr_offsets, indices_offsets, axis_order);
     data = sparse_coo_tensor->data();
 
     return Status::OK();
@@ -565,6 +568,22 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
   using BaseClass::index_value_type_;
   using BaseClass::pool_;
   using BaseClass::tensor_;
+
+  template <typename c_value_type>
+  inline Status CheckMaximumValue(const c_value_type type_max) const {
+    auto max_dimension =
+        *std::max_element(tensor_.shape().begin(), tensor_.shape().end());
+    if (static_cast<int64_t>(type_max) < max_dimension) {
+      // LCOV_EXCL_START: The following invalid causes program failure.
+      return Status::Invalid("The bit width of the index value type is too small");
+      // LCOV_EXCL_STOP
+    }
+    return Status::OK();
+  }
+
+  inline Status CheckMaximumValue(const int64_t) const { return Status::OK(); }
+
+  inline Status CheckMaximumValue(const uint64_t) const { return Status::OK(); }
 };
 
 // ----------------------------------------------------------------------
@@ -955,6 +974,22 @@ void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
 // ----------------------------------------------------------------------
 // SparseCSFIndex
 
+Status SparseCSFIndex::Make(const std::shared_ptr<DataType> indices_type,
+                            const std::vector<int64_t>& indptr_shape,
+                            const std::vector<int64_t>& indices_shape,
+                            const std::vector<int64_t>& indptr_offsets,
+                            const std::vector<int64_t>& indices_offsets,
+                            const std::vector<int64_t>& axis_order,
+                            std::shared_ptr<Buffer> indptr_data,
+                            std::shared_ptr<Buffer> indices_data,
+                            std::shared_ptr<SparseCSFIndex>* out) {
+  *out = std::make_shared<SparseCSFIndex>(
+      std::make_shared<Tensor>(indices_type, indptr_data, indptr_shape),
+      std::make_shared<Tensor>(indices_type, indices_data, indices_shape), indptr_offsets,
+      indices_offsets, axis_order);
+  return Status::OK();
+}
+
 // Constructor with two index vectors
 SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
                                const std::shared_ptr<Tensor>& indices,
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index a8e38b89ad2..c3c36ba8402 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -349,6 +349,17 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
  public:
   static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
 
+  /// \brief Make SparseCSFIndex from raw properties
+  static Status Make(const std::shared_ptr<DataType> indices_type,
+                     const std::vector<int64_t>& indptr_shape,
+                     const std::vector<int64_t>& indices_shape,
+                     const std::vector<int64_t>& indptr_offsets,
+                     const std::vector<int64_t>& indices_offsets,
+                     const std::vector<int64_t>& axis_order,
+                     std::shared_ptr<Buffer> indptr_data,
+                     std::shared_ptr<Buffer> indices_data,
+                     std::shared_ptr<SparseCSFIndex>* out);
+
   /// \brief Construct SparseCSFIndex from two index vectors
   explicit SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
                           const std::shared_ptr<Tensor>& indices,
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 5496df8b003..170cc2cc29c 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -912,30 +912,37 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) {
 
 template <typename IndexValueType>
 class TestSparseCSFTensorBase : public ::testing::Test {
-public:
-    void SetUp() {
-        shape_ = {6, 4};
-        dim_names_ = {"foo", "bar"};
-
-        // Dense representation:
-        // [
-        //    1  0  2  0
-        //    0  3  0  4
-        //    5  0  6  0
-        //    0 11  0 12
-        //   13  0 14  0
-        //    0 15  0 16
-        // ]
-        std::vector<int64_t> dense_values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
-                                             0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
-        auto dense_data = Buffer::Wrap(dense_values);
-        NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
-    }
-
-protected:
-    std::vector<int64_t> shape_;
-    std::vector<std::string> dim_names_;
-    std::shared_ptr<SparseCSFTensor> sparse_tensor_from_dense_;
+ public:
+  void SetUp() {
+    shape_ = {3, 3, 3, 4};
+    dim_names_ = {"a", "b", "c", "d"};
+
+    // COO representation:
+    //   X[1, 1, 1, 2] := 1
+    //   X[1, 1, 1, 3] := 2
+    //   X[1, 2, 1, 1] := 3
+    //   X[1, 2, 1, 3] := 4
+    //   X[1, 2, 2, 1] := 5
+    //   X[2, 2, 2, 1] := 6
+    //   X[2, 2, 2, 2] := 7
+    //   X[2, 2, 2, 3] := 8
+
+    std::vector<int64_t> dense_values = {
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+    auto dense_data = Buffer::Wrap(dense_values);
+    NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
+    ASSERT_OK(SparseCSFTensor::Make(dense_tensor,
+                                    TypeTraits<IndexValueType>::type_singleton(),
+                                    &sparse_tensor_from_dense_));
+  }
+
+ protected:
+  std::vector<int64_t> shape_;
+  std::vector<std::string> dim_names_;
+  std::shared_ptr<SparseCSCMatrix> sparse_tensor_from_dense_;
 };
 
 class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
@@ -957,15 +964,12 @@ TEST_F(TestSparseCSFTensor, TestToTensor) {
   std::shared_ptr<Buffer> indptr_buffer = Buffer::Wrap(indptr_values);
   std::shared_ptr<Buffer> indices_buffer = Buffer::Wrap(indices_values);
 
-  std::shared_ptr<Tensor> indptr =
-      std::make_shared<Tensor>(int64(), indptr_buffer, indptr_shape);
-  std::shared_ptr<Tensor> indices =
-      std::make_shared<Tensor>(int64(), indices_buffer, indices_shape);
-
-  std::shared_ptr<SparseCSFIndex> sparse_index = std::make_shared<SparseCSFIndex>(
-      indptr, indices, indptr_offsets, indices_offsets, axis_order);
+  std::shared_ptr<SparseCSFIndex> si;
+  ASSERT_OK(SparseCSFIndex::Make(int64(), indptr_shape, indices_shape, indptr_offsets,
+                                 indices_offsets, axis_order, indptr_buffer,
+                                 indices_buffer, &si));
   std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
-      sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
+      si, int64(), data_buffer, sparse_tensor_shape, dim_names);
 
   ASSERT_EQ(8, sparse_tensor->non_zero_length());
 

From a322ff5b26280a2263f1dda594e353a5434348b5 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Mon, 25 Nov 2019 18:47:31 +0100
Subject: [PATCH 06/18] Adding tests for multiple index value types for
 SparseCSFIndex.

---
 cpp/src/arrow/sparse_tensor.cc      |   3 +-
 cpp/src/arrow/sparse_tensor_test.cc | 106 ++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index e67fbfb3ebb..c18358b4388 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -477,7 +477,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
           if (row > 1) tree_split = true;
 
           indices[column * nonzero_count + counts[column]] =
-              coords->Value<IndexValueType>({row, column});
+              static_cast<c_index_value_type>(
+                  coords->Value<IndexValueType>({row, column}));
           indptr[column * (nonzero_count + 1) + counts[column]] =
               static_cast<c_index_value_type>(counts[column + 1]);
           ++counts[column];
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 170cc2cc29c..103fc86a7ee 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -1014,4 +1014,110 @@ TEST_F(TestSparseCSFTensor, CreationFromTensor) {
   ASSERT_OK(st->ToTensor(&dt));
   ASSERT_TRUE(tensor.Equals(*dt));
 }
+
+template <typename IndexValueType>
+class TestSparseCSFTensorForIndexValueType
+    : public TestSparseCSFTensorBase<IndexValueType> {
+ protected:
+  std::shared_ptr<SparseCSFIndex> MakeSparseCSFIndex(
+      std::vector<typename IndexValueType::c_type>& indptr_values,
+      std::vector<typename IndexValueType::c_type>& indices_values,
+      const std::vector<int64_t>& indptr_offsets,
+      const std::vector<int64_t>& indices_offsets,
+      const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
+      const std::vector<int64_t>& axis_order) const {
+    auto indptr_data = Buffer::Wrap(indptr_values);
+    auto indices_data = Buffer::Wrap(indices_values);
+    auto indptr =
+        std::make_shared<NumericTensor<IndexValueType>>(indptr_data, indptr_shape);
+    auto indices =
+        std::make_shared<NumericTensor<IndexValueType>>(indices_data, indices_shape);
+    return std::make_shared<SparseCSFIndex>(indptr, indices, indptr_offsets,
+                                            indices_offsets, axis_order);
+  }
+
+  template <typename CValueType>
+  std::shared_ptr<SparseCSFTensor> MakeSparseTensor(
+      const std::shared_ptr<SparseCSFIndex>& si,
+      std::vector<CValueType>& sparse_values) const {
+    auto data = Buffer::Wrap(sparse_values);
+    return std::make_shared<SparseCSFTensor>(si,
+                                             CTypeTraits<CValueType>::type_singleton(),
+                                             data, this->shape_, this->dim_names_);
+  }
+};
+
+TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType);
+
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, ToTensor) {
+  using IndexValueType = TypeParam;
+  using c_index_value_type = typename IndexValueType::c_type;
+
+  std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
+  std::vector<c_index_value_type> indptr_values = {0, 2, 3, 0, 1, 3, 4, 0, 2, 4, 5, 8};
+  std::vector<c_index_value_type> indices_values = {1, 2, 1, 2, 2, 1, 1, 2, 2,
+                                                    2, 3, 1, 3, 1, 1, 2, 3};
+  std::vector<int64_t> indices_offsets = {0, 2, 5, 9};
+  std::vector<int64_t> indptr_offsets = {0, 3, 7};
+  std::vector<int64_t> axis_order = {0, 1, 2, 3};
+  std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
+  std::vector<int64_t> indptr_shape({12});
+  std::vector<int64_t> indices_shape({17});
+  std::vector<std::string> dim_names({"a", "b", "c", "d"});
+
+  std::vector<int64_t> dense_values = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
+  std::shared_ptr<Buffer> indptr_buffer = Buffer::Wrap(indptr_values);
+  std::shared_ptr<Buffer> indices_buffer = Buffer::Wrap(indices_values);
+  std::shared_ptr<Buffer> dense_data = Buffer::Wrap(dense_values);
+
+  std::shared_ptr<SparseCSFIndex> si =
+      this->MakeSparseCSFIndex(indptr_values, indices_values, indptr_offsets,
+                               indices_offsets, indptr_shape, indices_shape, axis_order);
+  std::shared_ptr<SparseCSFTensor> st = this->MakeSparseTensor(si, data_values);
+
+  ASSERT_EQ(8, st->non_zero_length());
+
+  std::shared_ptr<Tensor> dt;
+  ASSERT_OK(st->ToTensor(&dt));
+  Tensor tensor(int64(), dense_data, sparse_tensor_shape, {});
+  ASSERT_TRUE(tensor.Equals(*dt));
+
+  std::shared_ptr<SparseCSFIndex> si2 =
+      arrow::internal::checked_pointer_cast<SparseCSFIndex>(
+          this->sparse_tensor_from_dense_->sparse_index());
+
+  ASSERT_EQ(si->indices()->type(), si2->indices()->type());
+  ASSERT_TRUE(si->indptr()->Equals(*si2->indptr()));
+  ASSERT_TRUE(si->indices()->Equals(*si2->indices()));
+  ASSERT_TRUE(si->indptr_offsets() == si2->indptr_offsets());
+  ASSERT_TRUE(si->indices_offsets() == si2->indices_offsets());
+  ASSERT_TRUE(si->indices_offsets() == si2->indices_offsets());
+  ASSERT_TRUE(si->axis_order() == si2->axis_order());
+
+  ASSERT_TRUE(si->Equals(*si2));
+  ASSERT_TRUE(st->data()->Equals(*this->sparse_tensor_from_dense_->data()));
+  //  ASSERT_TRUE(st->Equals(*this->sparse_tensor_from_dense_));
+}
+
+REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType, ToTensor);
+
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestSparseCSFTensorForIndexValueType, Int8Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestSparseCSFTensorForIndexValueType, UInt8Type);
+// INSTANTIATE_TYPED_TEST_CASE_P(TestInt16, TestSparseCSFTensorForIndexValueType,
+// Int16Type); INSTANTIATE_TYPED_TEST_CASE_P(TestUInt16,
+// TestSparseCSFTensorForIndexValueType,UInt16Type);
+// INSTANTIATE_TYPED_TEST_CASE_P(TestInt32, TestSparseCSFTensorForIndexValueType,
+// Int32Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt32, TestSparseCSFTensorForIndexValueType,
+                              UInt32Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt64, TestSparseCSFTensorForIndexValueType, Int64Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt64, TestSparseCSFTensorForIndexValueType,
+                              UInt64Type);
+
 }  // namespace arrow

From eb519471d72977e7ad35f1467daa1308e67ea655 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Sun, 8 Dec 2019 16:21:06 +0100
Subject: [PATCH 07/18] Switching SparseCSFIndex to '2D' data structure.

---
 cpp/src/arrow/compare.cc            |  10 ++
 cpp/src/arrow/sparse_tensor.cc      | 196 +++++++++++++---------------
 cpp/src/arrow/sparse_tensor.h       |  67 +++++-----
 cpp/src/arrow/sparse_tensor_test.cc | 158 ++++++++++------------
 format/SparseTensor.fbs             |  38 ++----
 5 files changed, 213 insertions(+), 256 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index f7431f80f5f..6e521a32c03 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -1194,6 +1194,12 @@ inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexTyp
           checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(right);
       return SparseTensorEqualsImpl<SparseIndexType, SparseCSCIndex>::Compare(left,
                                                                               right_csc);
+
+    case SparseTensorFormat::CSF: {
+      const auto& right_csf =
+          checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseIndexType, SparseCSFIndex>::Compare(left,
+                                                                              right_csf);
     }
 
     default:
@@ -1230,6 +1236,10 @@ bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right) {
     case SparseTensorFormat::CSC: {
       const auto& left_csc = checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(left);
       return SparseTensorEqualsImplDispatch(left_csc, right);
+
+    case SparseTensorFormat::CSF: {
+      const auto& left_csf = checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(left);
+      return SparseTensorEqualsImplDispatch(left_csf, right);
     }
 
     default:
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index c18358b4388..c917523a95a 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -23,6 +23,7 @@
 #include <memory>
 #include <numeric>
 
+#include "arrow/buffer_builder.h"
 #include "arrow/compare.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
@@ -439,10 +440,9 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
   Status Convert() {
     using c_index_value_type = typename IndexValueType::c_type;
     RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
-    const int64_t indices_elsize = sizeof(c_index_value_type);
 
     std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
-    RETURN_NOT_OK(SparseCOOTensor::Make(tensor_, &sparse_coo_tensor));
+    ARROW_ASSIGN_OR_RAISE(sparse_coo_tensor, SparseCOOTensor::Make(tensor_));
     std::shared_ptr<Tensor> coords =
         arrow::internal::checked_pointer_cast<SparseCOOIndex>(
             sparse_coo_tensor->sparse_index())
@@ -458,14 +458,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     std::vector<int64_t> axis_order(ndim);
     for (int64_t i = 0; i < ndim; ++i) axis_order[i] = i;
 
-    std::shared_ptr<Buffer> indices_buffer;
-    std::shared_ptr<Buffer> indptr_buffer;
-    RETURN_NOT_OK(
-        AllocateBuffer(pool_, indices_elsize * ndim * nonzero_count, &indices_buffer));
-    RETURN_NOT_OK(AllocateBuffer(pool_, indices_elsize * (ndim - 1) * (nonzero_count + 1),
-                                 &indptr_buffer));
-    auto* indices = reinterpret_cast<c_index_value_type*>(indices_buffer->mutable_data());
-    auto* indptr = reinterpret_cast<c_index_value_type*>(indptr_buffer->mutable_data());
+    std::vector<TypedBufferBuilder<c_index_value_type>> indptr_buffer_builders(ndim - 1);
+    std::vector<TypedBufferBuilder<c_index_value_type>> indices_buffer_builders(ndim);
 
     for (int64_t row = 0; row < nonzero_count; ++row) {
       bool tree_split = false;
@@ -476,73 +470,37 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
         if (tree_split || change || row == 0) {
           if (row > 1) tree_split = true;
 
-          indices[column * nonzero_count + counts[column]] =
-              static_cast<c_index_value_type>(
-                  coords->Value<IndexValueType>({row, column}));
-          indptr[column * (nonzero_count + 1) + counts[column]] =
-              static_cast<c_index_value_type>(counts[column + 1]);
+          if (column < ndim - 1)
+            RETURN_NOT_OK(indptr_buffer_builders[column].Append(
+                static_cast<c_index_value_type>(counts[column + 1])));
+          RETURN_NOT_OK(
+              indices_buffer_builders[column].Append(static_cast<c_index_value_type>(
+                  coords->Value<IndexValueType>({row, column}))));
           ++counts[column];
         }
       }
     }
-
-    for (int64_t column = 0; column < ndim; ++column) {
-      indptr[column * (nonzero_count + 1) + counts[column]] =
-          static_cast<c_index_value_type>(counts[column + 1]);
+    for (int64_t column = 0; column < ndim - 1; ++column) {
+      RETURN_NOT_OK(indptr_buffer_builders[column].Append(
+          static_cast<c_index_value_type>(counts[column + 1])));
     }
 
-    // Remove gaps from buffers
-    int64_t total_size = counts[0];
-    for (int64_t column = 1; column < ndim; ++column) {
-      for (int64_t i = 0; i < counts[column] + 1; ++i) {
-        if (column < ndim - 1)
-          indptr[total_size + column + i] = indptr[column * (nonzero_count + 1) + i];
-        if (i < counts[column])
-          indices[total_size + i] = indices[column * nonzero_count + i];
-      }
-      total_size += counts[column];
-    }
+    std::vector<std::shared_ptr<Buffer>> indptr_buffers(ndim - 1);
+    std::vector<std::shared_ptr<Buffer>> indices_buffers(ndim);
+    std::vector<int64_t> indptr_shapes(counts.begin(), counts.end() - 1);
+    std::vector<int64_t> indices_shapes = counts;
 
-    // Copy CSF index data into smaller buffers
-    std::shared_ptr<Buffer> out_indices_buffer;
-    std::shared_ptr<Buffer> out_indptr_buffer;
-    RETURN_NOT_OK(
-        AllocateBuffer(pool_, indices_elsize * total_size, &out_indices_buffer));
-    RETURN_NOT_OK(AllocateBuffer(pool_,
-                                 indices_elsize * total_size - nonzero_count + ndim - 1,
-                                 &out_indptr_buffer));
-    auto* out_indices =
-        reinterpret_cast<c_index_value_type*>(out_indices_buffer->mutable_data());
-    auto* out_indptr =
-        reinterpret_cast<c_index_value_type*>(out_indptr_buffer->mutable_data());
-
-    for (int64_t i = 0; i < total_size; ++i) out_indices[i] = indices[i];
-
-    for (int64_t i = 0; i < total_size - nonzero_count + ndim - 1; ++i)
-      out_indptr[i] = indptr[i];
-
-    // Construct SparseCSFTensor
-    std::vector<int64_t> out_indptr_shape({total_size - nonzero_count + ndim - 1});
-    std::vector<int64_t> out_indices_shape({total_size});
-
-    std::vector<int64_t> indptr_offsets(ndim - 1);
-    std::vector<int64_t> indices_offsets(ndim);
-    std::fill_n(indptr_offsets.begin(), ndim - 1, static_cast<int64_t>(0));
-    std::fill_n(indices_offsets.begin(), ndim, static_cast<int64_t>(0));
-
-    for (int64_t i = 0; i < ndim - 2; ++i)
-      indptr_offsets[i + 1] = indptr_offsets[i] + counts[i] + 1;
-
-    for (int64_t i = 0; i < ndim; ++i)
-      indices_offsets[i + 1] = indices_offsets[i] + counts[i];
-
-    sparse_index = std::make_shared<SparseCSFIndex>(
-        std::make_shared<Tensor>(index_value_type_, out_indptr_buffer, out_indptr_shape),
-        std::make_shared<Tensor>(index_value_type_, out_indices_buffer,
-                                 out_indices_shape),
-        indptr_offsets, indices_offsets, axis_order);
-    data = sparse_coo_tensor->data();
+    for (int64_t column = 0; column < ndim; ++column)
+      RETURN_NOT_OK(
+          indices_buffer_builders[column].Finish(&indices_buffers[column], true));
+
+    for (int64_t column = 0; column < ndim - 1; ++column)
+      RETURN_NOT_OK(indptr_buffer_builders[column].Finish(&indptr_buffers[column], true));
 
+    ARROW_ASSIGN_OR_RAISE(
+        sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
+                                           indptr_buffers, indices_buffers));
+    data = sparse_coo_tensor->data();
     return Status::OK();
   }
 
@@ -686,23 +644,19 @@ void assign_values(int64_t dimension_index, int64_t offset, int64_t first_ptr,
                    const int64_t* raw_data, const std::vector<int64_t> strides,
                    const std::vector<int64_t> axis_order, TYPE* out) {
   auto dimension = axis_order[dimension_index];
-  auto indices_offset = sparse_index->indices_offsets()[dimension];
-  auto indptr_offset = sparse_index->indptr_offsets()[dimension];
-  int64_t ndim = sparse_index->indices_offsets().size();
-
-  if (dimension == 0 && ndim > 1)
-    last_ptr = sparse_index->indptr_offsets()[dimension + 1] - 1;
+  int64_t ndim = axis_order.size();
+  if (dimension == 0 && ndim > 1) last_ptr = sparse_index->indptr()[0]->size() - 1;
 
   for (int64_t i = first_ptr; i < last_ptr; ++i) {
     int64_t tmp_offset =
-        offset + sparse_index->indices()->Value<IndexValueType>({indices_offset + i}) *
+        offset + sparse_index->indices()[dimension]->Value<IndexValueType>({i}) *
                      strides[dimension];
     if (dimension_index < ndim - 1)
       assign_values<TYPE, IndexValueType>(
           dimension + 1, tmp_offset,
-          sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i}),
-          sparse_index->indptr()->Value<IndexValueType>({indptr_offset + i + 1}),
-          sparse_index, raw_data, strides, axis_order, out);
+          sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
+          sparse_index->indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
+          raw_data, strides, axis_order, out);
     else
       out[tmp_offset] = static_cast<TYPE>(raw_data[i]);
   }
@@ -840,8 +794,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
     case SparseTensorFormat::CSF: {
       const auto& sparse_index =
           internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
-      const std::shared_ptr<const Tensor> indices = sparse_index.indices();
-      type = indices->type();
+      const std::vector<std::shared_ptr<Tensor>> indices = sparse_index.indices();
+      type = indices[0]->type();
       break;
     }
       // LCOV_EXCL_START: ignore program failure
@@ -975,40 +929,68 @@ void CheckSparseCSXIndexValidity(const std::shared_ptr<DataType>& indptr_type,
 // ----------------------------------------------------------------------
 // SparseCSFIndex
 
-Status SparseCSFIndex::Make(const std::shared_ptr<DataType> indices_type,
-                            const std::vector<int64_t>& indptr_shape,
-                            const std::vector<int64_t>& indices_shape,
-                            const std::vector<int64_t>& indptr_offsets,
-                            const std::vector<int64_t>& indices_offsets,
-                            const std::vector<int64_t>& axis_order,
-                            std::shared_ptr<Buffer> indptr_data,
-                            std::shared_ptr<Buffer> indices_data,
-                            std::shared_ptr<SparseCSFIndex>* out) {
-  *out = std::make_shared<SparseCSFIndex>(
-      std::make_shared<Tensor>(indices_type, indptr_data, indptr_shape),
-      std::make_shared<Tensor>(indices_type, indices_data, indices_shape), indptr_offsets,
-      indices_offsets, axis_order);
+namespace {
+
+inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>& indptr_type,
+                                          const std::shared_ptr<DataType>& indices_type,
+                                          const int64_t num_indptrs,
+                                          const int64_t num_indices,
+                                          const std::vector<int64_t>& indptr_shape,
+                                          const std::vector<int64_t>& indices_shape,
+                                          const int64_t axis_order_size) {
+  if (!is_integer(indptr_type->id())) {
+    return Status::Invalid("Type of SparseCSFIndex indptr must be integer");
+  }
+  if (!is_integer(indices_type->id())) {
+    return Status::Invalid("Type of SparseCSFIndex indices must be integer");
+  }
+  if (num_indptrs + 1 != num_indices) {
+    return Status::Invalid(
+        "SparseCSFIndex length indices must be equal to length inptrs plus one.");
+  }
+  if (axis_order_size != num_indices) {
+    return Status::Invalid(
+        "SparseCSFIndex length of indices must be equal number of dimensions.");
+  }
   return Status::OK();
 }
 
+}  // namespace
+
+Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
+    const std::shared_ptr<DataType>& indptr_type,
+    const std::shared_ptr<DataType>& indices_type,
+    const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
+    std::vector<std::shared_ptr<Buffer>> indptr_data,
+    std::vector<std::shared_ptr<Buffer>> indices_data) {
+  int64_t ndim = axis_order.size();
+  std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
+  std::vector<std::shared_ptr<Tensor>> indices(ndim);
+
+  for (int64_t i = 0; i < ndim - 1; ++i)
+    indptr[i] = std::make_shared<Tensor>(indptr_type, indptr_data[i],
+                                         std::vector<int64_t>({indices_shapes[i] + 1}));
+
+  for (int64_t i = 0; i < ndim; ++i)
+    indices[i] = std::make_shared<Tensor>(indices_type, indices_data[i],
+                                          std::vector<int64_t>({indices_shapes[i]}));
+
+  return std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
+}
+
 // Constructor with two index vectors
-SparseCSFIndex::SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
-                               const std::shared_ptr<Tensor>& indices,
-                               const std::vector<int64_t>& indptr_offsets,
-                               const std::vector<int64_t>& indices_offsets,
+SparseCSFIndex::SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
+                               std::vector<std::shared_ptr<Tensor>>& indices,
                                const std::vector<int64_t>& axis_order)
-    : SparseIndexBase(indices->size() - indices_offsets.back()),
+    : SparseIndexBase(indices.back()->shape()[0]),
       indptr_(indptr),
       indices_(indices),
-      indptr_offsets_(indptr_offsets),
-      indices_offsets_(indices_offsets),
       axis_order_(axis_order) {
-  ARROW_CHECK(is_integer(indptr_->type_id()));
-  ARROW_CHECK_EQ(1, indptr_->ndim());
-  ARROW_CHECK(is_integer(indices_->type_id()));
-  ARROW_CHECK_EQ(1, indices_->ndim());
-  ARROW_CHECK_EQ(indptr_offsets_.size() + 1, indices_offsets_.size());
-  ARROW_CHECK_EQ(axis_order_.size(), indices_offsets_.size());
+  ARROW_CHECK(CheckSparseCSFIndexValidity(indptr_.front()->type(),
+                                          indices_.front()->type(), indptr_.size(),
+                                          indices_.size(), indptr_.back()->shape(),
+                                          indices_.back()->shape(), axis_order_.size())
+                  .ok());
 }
 
 std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index c3c36ba8402..b75c42204f0 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -338,46 +338,44 @@ class ARROW_EXPORT SparseCSCIndex
 ///
 /// A CSF sparse index manages the location of its non-zero values by set of
 /// prefix trees. Each path from a root to leaf forms one tensor non-zero index.
-/// CSF is implemented with five vectors.
+/// CSF is implemented with three vectors.
 ///
-/// Vectors indptr and indices are split into N-1 segments (by indptr_offsets) and
-/// N segments (by indices_offsetsy, where N is the number of dimensions.
-/// Indptr and indices segments describe the set of prefix trees.
-///
-/// Trees traverse dimensions in order given by axis_order.
+/// Vectors inptr and indices contain N-1 and N buffers respectively, where N is the
+/// number of dimensions. Axis_order is a vector of integers of legth N. Indptr and
+/// indices describe the set of prefix trees. Trees traverse dimensions in order given by
+/// axis_order.
 class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIndex> {
  public:
   static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
 
   /// \brief Make SparseCSFIndex from raw properties
-  static Status Make(const std::shared_ptr<DataType> indices_type,
-                     const std::vector<int64_t>& indptr_shape,
-                     const std::vector<int64_t>& indices_shape,
-                     const std::vector<int64_t>& indptr_offsets,
-                     const std::vector<int64_t>& indices_offsets,
-                     const std::vector<int64_t>& axis_order,
-                     std::shared_ptr<Buffer> indptr_data,
-                     std::shared_ptr<Buffer> indices_data,
-                     std::shared_ptr<SparseCSFIndex>* out);
+  static Result<std::shared_ptr<SparseCSFIndex>> Make(
+      const std::shared_ptr<DataType>& indptr_type,
+      const std::shared_ptr<DataType>& indices_type,
+      const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
+      std::vector<std::shared_ptr<Buffer>> indptr_data,
+      std::vector<std::shared_ptr<Buffer>> indices_data);
+
+  /// \brief Make SparseCSFIndex from raw properties
+  static Result<std::shared_ptr<SparseCSFIndex>> Make(
+      const std::shared_ptr<DataType>& indices_type,
+      const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
+      std::vector<std::shared_ptr<Buffer>> indptr_data,
+      std::vector<std::shared_ptr<Buffer>> indices_data) {
+    return Make(indices_type, indices_type, indices_shapes, axis_order, indptr_data,
+                indices_data);
+  }
 
   /// \brief Construct SparseCSFIndex from two index vectors
-  explicit SparseCSFIndex(const std::shared_ptr<Tensor>& indptr,
-                          const std::shared_ptr<Tensor>& indices,
-                          const std::vector<int64_t>& indptr_offsets,
-                          const std::vector<int64_t>& indices_offsets,
+  explicit SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
+                          std::vector<std::shared_ptr<Tensor>>& indices,
                           const std::vector<int64_t>& axis_order);
 
   /// \brief Return a 1D tensor of indptr vector
-  const std::shared_ptr<Tensor>& indptr() const { return indptr_; }
+  const std::vector<std::shared_ptr<Tensor>>& indptr() const { return indptr_; }
 
   /// \brief Return a 1D tensor of indices vector
-  const std::shared_ptr<Tensor>& indices() const { return indices_; }
-
-  /// \brief Return a 1D vector of indptr offsets
-  const std::vector<int64_t>& indptr_offsets() const { return indptr_offsets_; }
-
-  /// \brief Return a vector of indices offsets
-  const std::vector<int64_t>& indices_offsets() const { return indices_offsets_; }
+  const std::vector<std::shared_ptr<Tensor>>& indices() const { return indices_; }
 
   /// \brief Return a 1D vector specifying the order of axes
   const std::vector<int64_t>& axis_order() const { return axis_order_; }
@@ -387,17 +385,16 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
 
   /// \brief Return whether the CSF indices are equal
   bool Equals(const SparseCSFIndex& other) const {
-    return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices()) &&
-           indptr_offsets() == other.indptr_offsets() &&
-           indices_offsets() == other.indices_offsets() &&
-           axis_order() == other.axis_order();
+    for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i)
+      if (!indices()[i]->Equals(*other.indices()[i])) return false;
+    for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i)
+      if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
+    return axis_order() == other.axis_order();
   }
 
  protected:
-  std::shared_ptr<Tensor> indptr_;
-  std::shared_ptr<Tensor> indices_;
-  std::vector<int64_t> indptr_offsets_;
-  std::vector<int64_t> indices_offsets_;
+  std::vector<std::shared_ptr<Tensor>> indptr_;
+  std::vector<std::shared_ptr<Tensor>> indices_;
   std::vector<int64_t> axis_order_;
 };
 
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 103fc86a7ee..314f1fea213 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -934,9 +934,9 @@ class TestSparseCSFTensorBase : public ::testing::Test {
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
     auto dense_data = Buffer::Wrap(dense_values);
     NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
-    ASSERT_OK(SparseCSFTensor::Make(dense_tensor,
-                                    TypeTraits<IndexValueType>::type_singleton(),
-                                    &sparse_tensor_from_dense_));
+    ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_,
+                         SparseCSFTensor::Make(
+                             dense_tensor, TypeTraits<IndexValueType>::type_singleton()));
   }
 
  protected:
@@ -947,46 +947,6 @@ class TestSparseCSFTensorBase : public ::testing::Test {
 
 class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
 
-TEST_F(TestSparseCSFTensor, TestToTensor) {
-  std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<int64_t> indptr_values = {0, 2, 3, 0, 1, 3, 4, 0, 2, 4, 5, 8};
-  std::vector<int64_t> indices_values = {1, 2, 1, 2, 2, 1, 1, 2, 2,
-                                         2, 3, 1, 3, 1, 1, 2, 3};
-  std::vector<int64_t> indices_offsets = {0, 2, 5, 9};
-  std::vector<int64_t> indptr_offsets = {0, 3, 7};
-  std::vector<int64_t> axis_order = {0, 1, 2, 3};
-  std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
-  std::vector<int64_t> indptr_shape({12});
-  std::vector<int64_t> indices_shape({17});
-  std::vector<std::string> dim_names({"a", "b", "c", "d"});
-
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
-  std::shared_ptr<Buffer> indptr_buffer = Buffer::Wrap(indptr_values);
-  std::shared_ptr<Buffer> indices_buffer = Buffer::Wrap(indices_values);
-
-  std::shared_ptr<SparseCSFIndex> si;
-  ASSERT_OK(SparseCSFIndex::Make(int64(), indptr_shape, indices_shape, indptr_offsets,
-                                 indices_offsets, axis_order, indptr_buffer,
-                                 indices_buffer, &si));
-  std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
-      si, int64(), data_buffer, sparse_tensor_shape, dim_names);
-
-  ASSERT_EQ(8, sparse_tensor->non_zero_length());
-
-  std::shared_ptr<Tensor> dense_tensor;
-  ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor));
-
-  std::vector<int64_t> dense_values = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-  auto dense_data = Buffer::Wrap(dense_values);
-  Tensor tensor(int64(), dense_data, sparse_tensor_shape, {});
-
-  ASSERT_TRUE(tensor.Equals(*dense_tensor));
-}
-
 TEST_F(TestSparseCSFTensor, CreationFromTensor) {
   std::vector<int64_t> values = {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -999,7 +959,7 @@ TEST_F(TestSparseCSFTensor, CreationFromTensor) {
   Tensor tensor(int64(), buffer, shape, {}, dim_names);
 
   std::shared_ptr<SparseCSFTensor> st;
-  ASSERT_OK(SparseCSFTensor::Make(tensor, &st));
+  ASSERT_OK_AND_ASSIGN(st, SparseCSFTensor::Make(tensor));
 
   ASSERT_EQ(8, st->non_zero_length());
   ASSERT_TRUE(st->is_mutable());
@@ -1049,71 +1009,91 @@ class TestSparseCSFTensorForIndexValueType
 
 TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType);
 
-TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, ToTensor) {
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorFromTensor) {
+  using IndexValueType = TypeParam;
+  using c_index_value_type = typename IndexValueType::c_type;
+
+  std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
+  std::vector<std::vector<c_index_value_type>> indptr_values = {
+      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
+  std::vector<std::vector<c_index_value_type>> indices_values = {
+      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
+  std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
+  std::vector<int64_t> axis_order = {0, 1, 2, 3};
+  std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
+  std::vector<int64_t> indices_shapes({2, 3, 4, 8});
+  std::vector<std::string> dim_names({"a", "b", "c", "d"});
+
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
+  for (int64_t i = 0; i < static_cast<int64_t>(indptr_values.size()); ++i)
+    indptr_buffers[i] = Buffer::Wrap(indptr_values[i]);
+  for (int64_t i = 0; i < static_cast<int64_t>(indices_values.size()); ++i)
+    indices_buffers[i] = Buffer::Wrap(indices_values[i]);
+
+  std::shared_ptr<SparseCSFIndex> sparse_index;
+  ASSERT_OK_AND_ASSIGN(
+      sparse_index,
+      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes,
+                           axis_order, indptr_buffers, indices_buffers));
+  std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
+      sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
+
+  ASSERT_TRUE(sparse_tensor->Equals(*this->sparse_tensor_from_dense_));
+}
+
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
   using IndexValueType = TypeParam;
   using c_index_value_type = typename IndexValueType::c_type;
 
   std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<c_index_value_type> indptr_values = {0, 2, 3, 0, 1, 3, 4, 0, 2, 4, 5, 8};
-  std::vector<c_index_value_type> indices_values = {1, 2, 1, 2, 2, 1, 1, 2, 2,
-                                                    2, 3, 1, 3, 1, 1, 2, 3};
-  std::vector<int64_t> indices_offsets = {0, 2, 5, 9};
-  std::vector<int64_t> indptr_offsets = {0, 3, 7};
+  std::vector<std::vector<c_index_value_type>> indptr_values = {
+      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
+  std::vector<std::vector<c_index_value_type>> indices_values = {
+      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
+  std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
   std::vector<int64_t> axis_order = {0, 1, 2, 3};
   std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
-  std::vector<int64_t> indptr_shape({12});
-  std::vector<int64_t> indices_shape({17});
+  std::vector<int64_t> indices_shapes({2, 3, 4, 8});
   std::vector<std::string> dim_names({"a", "b", "c", "d"});
 
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
+  for (int64_t i = 0; i < static_cast<int64_t>(indptr_values.size()); ++i)
+    indptr_buffers[i] = Buffer::Wrap(indptr_values[i]);
+  for (int64_t i = 0; i < static_cast<int64_t>(indices_values.size()); ++i)
+    indices_buffers[i] = Buffer::Wrap(indices_values[i]);
+
   std::vector<int64_t> dense_values = {
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
-  std::shared_ptr<Buffer> indptr_buffer = Buffer::Wrap(indptr_values);
-  std::shared_ptr<Buffer> indices_buffer = Buffer::Wrap(indices_values);
-  std::shared_ptr<Buffer> dense_data = Buffer::Wrap(dense_values);
-
-  std::shared_ptr<SparseCSFIndex> si =
-      this->MakeSparseCSFIndex(indptr_values, indices_values, indptr_offsets,
-                               indices_offsets, indptr_shape, indices_shape, axis_order);
-  std::shared_ptr<SparseCSFTensor> st = this->MakeSparseTensor(si, data_values);
-
-  ASSERT_EQ(8, st->non_zero_length());
-
-  std::shared_ptr<Tensor> dt;
-  ASSERT_OK(st->ToTensor(&dt));
+  auto dense_data = Buffer::Wrap(dense_values);
   Tensor tensor(int64(), dense_data, sparse_tensor_shape, {});
-  ASSERT_TRUE(tensor.Equals(*dt));
 
-  std::shared_ptr<SparseCSFIndex> si2 =
-      arrow::internal::checked_pointer_cast<SparseCSFIndex>(
-          this->sparse_tensor_from_dense_->sparse_index());
-
-  ASSERT_EQ(si->indices()->type(), si2->indices()->type());
-  ASSERT_TRUE(si->indptr()->Equals(*si2->indptr()));
-  ASSERT_TRUE(si->indices()->Equals(*si2->indices()));
-  ASSERT_TRUE(si->indptr_offsets() == si2->indptr_offsets());
-  ASSERT_TRUE(si->indices_offsets() == si2->indices_offsets());
-  ASSERT_TRUE(si->indices_offsets() == si2->indices_offsets());
-  ASSERT_TRUE(si->axis_order() == si2->axis_order());
-
-  ASSERT_TRUE(si->Equals(*si2));
-  ASSERT_TRUE(st->data()->Equals(*this->sparse_tensor_from_dense_->data()));
-  //  ASSERT_TRUE(st->Equals(*this->sparse_tensor_from_dense_));
+  std::shared_ptr<SparseCSFIndex> sparse_index;
+  ASSERT_OK_AND_ASSIGN(
+      sparse_index,
+      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes,
+                           axis_order, indptr_buffers, indices_buffers));
+  std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
+      sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
+
+  std::shared_ptr<Tensor> dense_tensor;
+  ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor));
+  ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
 
-REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType, ToTensor);
+REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType,
+                           TestSparseTensorFromTensor, TestSparseTensorToTensor);
 
 INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestSparseCSFTensorForIndexValueType, Int8Type);
 INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestSparseCSFTensorForIndexValueType, UInt8Type);
-// INSTANTIATE_TYPED_TEST_CASE_P(TestInt16, TestSparseCSFTensorForIndexValueType,
-// Int16Type); INSTANTIATE_TYPED_TEST_CASE_P(TestUInt16,
-// TestSparseCSFTensorForIndexValueType,UInt16Type);
-// INSTANTIATE_TYPED_TEST_CASE_P(TestInt32, TestSparseCSFTensorForIndexValueType,
-// Int32Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt16, TestSparseCSFTensorForIndexValueType, Int16Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestUInt16, TestSparseCSFTensorForIndexValueType,
+                              UInt16Type);
+INSTANTIATE_TYPED_TEST_CASE_P(TestInt32, TestSparseCSFTensorForIndexValueType, Int32Type);
 INSTANTIATE_TYPED_TEST_CASE_P(TestUInt32, TestSparseCSFTensorForIndexValueType,
                               UInt32Type);
 INSTANTIATE_TYPED_TEST_CASE_P(TestInt64, TestSparseCSFTensorForIndexValueType, Int64Type);
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 12e9f870376..56acdfc01e5 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -145,41 +145,29 @@ table SparseTensorIndexCSF {
   ///    / \ / \   \    /|\
   ///   2  3 1  3   1  1 2 3
 
-  /// The type of values in indptrBuffer
+  /// The type of values in indptrBuffers
   indptrType: Int;
 
-  /// indptrBuffer stores the sparsity structure.
-  /// For example, the indptrBuffer for the above X is:
-  ///
-  ///   indptrBuffer(X) = [0, 2, 3, 0, 1, 3, 4, 0, 2, 4, 5, 8].
-  ///
-  indptrBuffer: Buffer;
-
-  /// indptrOffsets stores per dimension offset in indptrBuffer.
-  /// For example, the indptrOffsets for the above X is:
+  /// indptrBuffers stores the sparsity structure.
+  /// Position in the indptrBuffers vector signifies the dimension.
+  /// For example, the indptrBuffers for the above X is:
   ///
-  ///   indptrOffsets(X) = [0, 3, 7].
+  ///   indptrBuffer(X) = [[0, 2, 3], [0, 1, 3, 4], [0, 2, 4, 5, 8]].
   ///
-  indptrOffsets: [int];
+  indptrBuffers: [Buffer];
 
-  /// The type of values in indicesBuffer
+  /// The type of values in indicesBuffers
   indicesType: Int;
 
-  /// indicesBuffer stores the label of each node,
-  /// For example, the indicesBuffer for the above X is:
-  ///
-  ///   indicesBuffer(X) = [1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 3, 1, 3, 1, 1, 2, 3].
-  ///
-  indicesBuffer: Buffer;
-
-  /// indicesOffsets stores per dimension offset in indicesOffsets.
-  /// For example, the indicesBuffer for the above X is:
+  /// indicesBuffers stores the label of each node.
+  /// Position in the indicesBuffers vector signifies the dimension.
+  /// For example, the indicesBuffers for the above X is:
   ///
-  ///   indicesOffsets(X) = [0, 2, 5, 9].
+  ///   indicesBuffer(X) = [[1, 2], [1, 2, 2], [1, 1, 2, 2], [2, 3, 1, 3, 1, 1, 2, 3]].
   ///
-  indicesOffsets: [int];
+  indicesBuffers: [Buffer];
 
-  /// axisOrder stores the sequence in which dimensions were traversed.
+  /// axisOrder stores the sequence in which dimensions were traversed to produce the prefix tree.
   /// For example, the axisOrder for the above X is:
   ///
   ///   axisOrder(X) = [0, 1, 2, 3].

From bd0d8c2f80608ec56b0f7fedb74c80236ac989be Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Fri, 13 Dec 2019 17:37:10 +0100
Subject: [PATCH 08/18] Dense to sparse CSF conversion now in order of
 dimension size.

---
 cpp/src/arrow/sparse_tensor.cc | 12 ++++++------
 format/SparseTensor.fbs        |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index c917523a95a..9c265f97e8a 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -17,6 +17,7 @@
 
 #include "arrow/sparse_tensor.h"
 
+#include <arrow/util/sort.h>
 #include <algorithm>
 #include <functional>
 #include <limits>
@@ -454,9 +455,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
 
     std::vector<int64_t> counts(ndim);
     std::fill_n(counts.begin(), ndim, static_cast<int64_t>(0));
-
-    std::vector<int64_t> axis_order(ndim);
-    for (int64_t i = 0; i < ndim; ++i) axis_order[i] = i;
+    std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
 
     std::vector<TypedBufferBuilder<c_index_value_type>> indptr_buffer_builders(ndim - 1);
     std::vector<TypedBufferBuilder<c_index_value_type>> indices_buffer_builders(ndim);
@@ -464,8 +463,9 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     for (int64_t row = 0; row < nonzero_count; ++row) {
       bool tree_split = false;
       for (int64_t column = 0; column < ndim; ++column) {
-        bool change = coords->Value<IndexValueType>({row, column}) !=
-                      coords->Value<IndexValueType>({row - 1, column});
+        int64_t dimension = axis_order[column];
+        bool change = coords->Value<IndexValueType>({row, dimension}) !=
+                      coords->Value<IndexValueType>({row - 1, dimension});
 
         if (tree_split || change || row == 0) {
           if (row > 1) tree_split = true;
@@ -475,7 +475,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
                 static_cast<c_index_value_type>(counts[column + 1])));
           RETURN_NOT_OK(
               indices_buffer_builders[column].Append(static_cast<c_index_value_type>(
-                  coords->Value<IndexValueType>({row, column}))));
+                  coords->Value<IndexValueType>({row, dimension}))));
           ++counts[column];
         }
       }
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 56acdfc01e5..e3e8df11d44 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -172,7 +172,7 @@ table SparseTensorIndexCSF {
   ///
   ///   axisOrder(X) = [0, 1, 2, 3].
   ///
-  axisOrder: [long];
+  axisOrder: [Int];
 }
 
 union SparseTensorIndex {

From 6ceb406b6486e8f91c694a2102a8a851a62b1aa7 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 17 Dec 2019 04:03:21 +0100
Subject: [PATCH 09/18] Implementing review feedback.

---
 cpp/src/arrow/sparse_tensor.cc | 25 +++++++++++++++++++------
 cpp/src/arrow/sparse_tensor.h  |  1 +
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 9c265f97e8a..2a4f5e792e4 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -442,6 +442,13 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     using c_index_value_type = typename IndexValueType::c_type;
     RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
 
+    const int64_t ndim = tensor_.ndim();
+    if (ndim < 2) {
+      // LCOV_EXCL_START: The following invalid causes program failure.
+      return Status::Invalid("Invalid tensor dimension");
+      // LCOV_EXCL_STOP
+    }
+
     std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
     ARROW_ASSIGN_OR_RAISE(sparse_coo_tensor, SparseCOOTensor::Make(tensor_));
     std::shared_ptr<Tensor> coords =
@@ -449,8 +456,10 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
             sparse_coo_tensor->sparse_index())
             ->indices();
 
+    // TODO(rok): Coords should be sorted with axis_order priority to improve compression.
+    // ARROW-4221 would help here as well.
+
     // Convert SparseCOOTensor to long CSF buffers
-    const int64_t ndim = tensor_.ndim();
     const int64_t nonzero_count = sparse_coo_tensor->non_zero_length();
 
     std::vector<int64_t> counts(ndim);
@@ -939,18 +948,18 @@ inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>& indpt
                                           const std::vector<int64_t>& indices_shape,
                                           const int64_t axis_order_size) {
   if (!is_integer(indptr_type->id())) {
-    return Status::Invalid("Type of SparseCSFIndex indptr must be integer");
+    return Status::TypeError("Type of SparseCSFIndex indptr must be integer");
   }
   if (!is_integer(indices_type->id())) {
-    return Status::Invalid("Type of SparseCSFIndex indices must be integer");
+    return Status::TypeError("Type of SparseCSFIndex indices must be integer");
   }
   if (num_indptrs + 1 != num_indices) {
     return Status::Invalid(
-        "SparseCSFIndex length indices must be equal to length inptrs plus one.");
+        "Length of indices must be equal to length of inptrs + 1 for SparseCSFIndex.");
   }
   if (axis_order_size != num_indices) {
     return Status::Invalid(
-        "SparseCSFIndex length of indices must be equal number of dimensions.");
+        "Length of indices must be equal number of dimensions for SparseCSFIndex.");
   }
   return Status::OK();
 }
@@ -970,11 +979,15 @@ Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
   for (int64_t i = 0; i < ndim - 1; ++i)
     indptr[i] = std::make_shared<Tensor>(indptr_type, indptr_data[i],
                                          std::vector<int64_t>({indices_shapes[i] + 1}));
-
   for (int64_t i = 0; i < ndim; ++i)
     indices[i] = std::make_shared<Tensor>(indices_type, indices_data[i],
                                           std::vector<int64_t>({indices_shapes[i]}));
 
+  ARROW_CHECK(CheckSparseCSFIndexValidity(indptr_type, indices_type, indptr.size(),
+                                          indices.size(), indptr.back()->shape(),
+                                          indices.back()->shape(), axis_order.size())
+                  .ok());
+
   return std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
 }
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index b75c42204f0..64e730b78d3 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -347,6 +347,7 @@ class ARROW_EXPORT SparseCSCIndex
 class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIndex> {
  public:
   static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSF;
+  static constexpr char const* kTypeName = "SparseCSFIndex";
 
   /// \brief Make SparseCSFIndex from raw properties
   static Result<std::shared_ptr<SparseCSFIndex>> Make(

From 4f2bf00ddc5b2feaf5810df51d39157859f30193 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 17 Dec 2019 22:06:50 +0100
Subject: [PATCH 10/18] Work on CSF index tests.

---
 cpp/src/arrow/sparse_tensor.cc      | 26 +++++----
 cpp/src/arrow/sparse_tensor_test.cc | 91 ++++++++++++++++-------------
 2 files changed, 67 insertions(+), 50 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 2a4f5e792e4..404cff5a841 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -443,6 +443,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
 
     const int64_t ndim = tensor_.ndim();
+    std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
+
     if (ndim < 2) {
       // LCOV_EXCL_START: The following invalid causes program failure.
       return Status::Invalid("Invalid tensor dimension");
@@ -464,8 +466,6 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
 
     std::vector<int64_t> counts(ndim);
     std::fill_n(counts.begin(), ndim, static_cast<int64_t>(0));
-    std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
-
     std::vector<TypedBufferBuilder<c_index_value_type>> indptr_buffer_builders(ndim - 1);
     std::vector<TypedBufferBuilder<c_index_value_type>> indices_buffer_builders(ndim);
 
@@ -477,7 +477,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
                       coords->Value<IndexValueType>({row - 1, dimension});
 
         if (tree_split || change || row == 0) {
-          if (row > 1) tree_split = true;
+          if (row > 1 || change) tree_split = true;
 
           if (column < ndim - 1)
             RETURN_NOT_OK(indptr_buffer_builders[column].Append(
@@ -648,19 +648,18 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
 }
 
 template <typename TYPE, typename IndexValueType>
-void assign_values(int64_t dimension_index, int64_t offset, int64_t first_ptr,
-                   int64_t last_ptr, const SparseCSFIndex* sparse_index,
-                   const int64_t* raw_data, const std::vector<int64_t> strides,
+void assign_values(int64_t dimension, int64_t offset, int64_t first_ptr, int64_t last_ptr,
+                   const SparseCSFIndex* sparse_index, const int64_t* raw_data,
+                   const std::vector<int64_t> strides,
                    const std::vector<int64_t> axis_order, TYPE* out) {
-  auto dimension = axis_order[dimension_index];
   int64_t ndim = axis_order.size();
-  if (dimension == 0 && ndim > 1) last_ptr = sparse_index->indptr()[0]->size() - 1;
 
   for (int64_t i = first_ptr; i < last_ptr; ++i) {
     int64_t tmp_offset =
         offset + sparse_index->indices()[dimension]->Value<IndexValueType>({i}) *
-                     strides[dimension];
-    if (dimension_index < ndim - 1)
+                     strides[axis_order[dimension]];
+
+    if (dimension < ndim - 1)
       assign_values<TYPE, IndexValueType>(
           dimension + 1, tmp_offset,
           sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
@@ -756,8 +755,13 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
     case SparseTensorFormat::CSF: {
       const auto& sparse_index =
           internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
+      int64_t last_ptr_index = sparse_index.indptr()[0]->size() - 1;
+      int64_t first_ptr = sparse_index.indptr()[0]->Value<IndexValueType>({0});
+      int64_t last_ptr =
+          sparse_index.indptr()[0]->Value<IndexValueType>({last_ptr_index});
+
       assign_values<value_type, IndexValueType>(
-          0, 0, 0, 0, &sparse_index,
+          0, 0, first_ptr, last_ptr, &sparse_index,
           reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides,
           sparse_index.axis_order(), values);
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 314f1fea213..a5723aa2adb 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -914,24 +914,15 @@ template <typename IndexValueType>
 class TestSparseCSFTensorBase : public ::testing::Test {
  public:
   void SetUp() {
-    shape_ = {3, 3, 3, 4};
+    shape_ = {4, 3, 5, 2};
     dim_names_ = {"a", "b", "c", "d"};
 
-    // COO representation:
-    //   X[1, 1, 1, 2] := 1
-    //   X[1, 1, 1, 3] := 2
-    //   X[1, 2, 1, 1] := 3
-    //   X[1, 2, 1, 3] := 4
-    //   X[1, 2, 2, 1] := 5
-    //   X[2, 2, 2, 1] := 6
-    //   X[2, 2, 2, 2] := 7
-    //   X[2, 2, 2, 3] := 8
-
     std::vector<int64_t> dense_values = {
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+        0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 8};
     auto dense_data = Buffer::Wrap(dense_values);
     NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
     ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_,
@@ -949,11 +940,12 @@ class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
 
 TEST_F(TestSparseCSFTensor, CreationFromTensor) {
   std::vector<int64_t> values = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-  std::vector<int64_t> shape({3, 3, 3, 4});
+      0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 8};
+  std::vector<int64_t> shape({4, 3, 5, 2});
   std::vector<std::string> dim_names({"a", "b", "c", "d"});
   std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
   Tensor tensor(int64(), buffer, shape, {}, dim_names);
@@ -961,8 +953,29 @@ TEST_F(TestSparseCSFTensor, CreationFromTensor) {
   std::shared_ptr<SparseCSFTensor> st;
   ASSERT_OK_AND_ASSIGN(st, SparseCSFTensor::Make(tensor));
 
+  std::vector<std::vector<int64_t>> indptr_values = {
+      {0, 1, 4, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 5, 6, 8}};
+  std::vector<std::vector<int64_t>> indices_values = {
+      {1, 0, 1}, {0, 0, 1, 0, 1, 2}, {0, 0, 0, 1, 3, 3}, {0, 1, 0, 0, 3, 4, 3, 4}};
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
+  std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
+  std::vector<int64_t> axis_order = {3, 1, 0, 2};
+  std::vector<int64_t> indices_shapes = {3, 6, 6, 8};
+
+  for (int64_t i = 0; i < static_cast<int64_t>(indptr_values.size()); ++i)
+    indptr_buffers[i] = Buffer::Wrap(indptr_values[i]);
+  for (int64_t i = 0; i < static_cast<int64_t>(indices_values.size()); ++i)
+    indices_buffers[i] = Buffer::Wrap(indices_values[i]);
+
+  std::shared_ptr<SparseCSFIndex> sparse_index;
+  ASSERT_OK_AND_ASSIGN(sparse_index,
+                       SparseCSFIndex::Make(tensor.type(), indices_shapes, axis_order,
+                                            indptr_buffers, indices_buffers));
+
+  const auto& si = internal::checked_cast<const SparseCSFIndex&>(*st->sparse_index());
   ASSERT_EQ(8, st->non_zero_length());
   ASSERT_TRUE(st->is_mutable());
+  ASSERT_TRUE(si.Equals(*sparse_index));
 
   ASSERT_EQ(dim_names, st->dim_names());
   ASSERT_EQ("a", st->dim_name(0));
@@ -1015,14 +1028,14 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorFromTensor) {
 
   std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
   std::vector<std::vector<c_index_value_type>> indptr_values = {
-      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
+      {0, 1, 4, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 5, 6, 8}};
   std::vector<std::vector<c_index_value_type>> indices_values = {
-      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
+      {1, 0, 1}, {0, 0, 1, 0, 1, 2}, {0, 0, 0, 1, 3, 3}, {0, 1, 0, 0, 3, 4, 3, 4}};
   std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
   std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
-  std::vector<int64_t> axis_order = {0, 1, 2, 3};
-  std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
-  std::vector<int64_t> indices_shapes({2, 3, 4, 8});
+  std::vector<int64_t> axis_order = {3, 1, 0, 2};
+  std::vector<int64_t> sparse_tensor_shape({4, 3, 5, 2});
+  std::vector<int64_t> indices_shapes = {3, 6, 6, 8};
   std::vector<std::string> dim_names({"a", "b", "c", "d"});
 
   std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
@@ -1048,14 +1061,14 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
 
   std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
   std::vector<std::vector<c_index_value_type>> indptr_values = {
-      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
+      {0, 1, 4, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 5, 6, 8}};
   std::vector<std::vector<c_index_value_type>> indices_values = {
-      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
+      {1, 0, 1}, {0, 0, 1, 0, 1, 2}, {0, 0, 0, 1, 3, 3}, {0, 1, 0, 0, 3, 4, 3, 4}};
   std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
   std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
-  std::vector<int64_t> axis_order = {0, 1, 2, 3};
-  std::vector<int64_t> sparse_tensor_shape({3, 3, 3, 4});
-  std::vector<int64_t> indices_shapes({2, 3, 4, 8});
+  std::vector<int64_t> axis_order = {3, 1, 0, 2};
+  std::vector<int64_t> indices_shapes = {3, 6, 6, 8};
+  std::vector<int64_t> sparse_tensor_shape({4, 3, 5, 2});
   std::vector<std::string> dim_names({"a", "b", "c", "d"});
 
   std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
@@ -1065,10 +1078,11 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
     indices_buffers[i] = Buffer::Wrap(indices_values[i]);
 
   std::vector<int64_t> dense_values = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+      0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 8};
   auto dense_data = Buffer::Wrap(dense_values);
   Tensor tensor(int64(), dense_data, sparse_tensor_shape, {});
 
@@ -1078,11 +1092,11 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
       SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes,
                            axis_order, indptr_buffers, indices_buffers));
   std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
-      sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
+      sparse_index, tensor.type(), data_buffer, sparse_tensor_shape, dim_names);
 
-  std::shared_ptr<Tensor> dense_tensor;
-  ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor));
-  ASSERT_TRUE(tensor.Equals(*dense_tensor));
+  //  std::shared_ptr<Tensor> dense_tensor;
+  //  ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor));
+  //  ASSERT_TRUE(tensor.Equals(*dense_tensor));
 }
 
 REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType,
@@ -1099,5 +1113,4 @@ INSTANTIATE_TYPED_TEST_CASE_P(TestUInt32, TestSparseCSFTensorForIndexValueType,
 INSTANTIATE_TYPED_TEST_CASE_P(TestInt64, TestSparseCSFTensorForIndexValueType, Int64Type);
 INSTANTIATE_TYPED_TEST_CASE_P(TestUInt64, TestSparseCSFTensorForIndexValueType,
                               UInt64Type);
-
 }  // namespace arrow

From 24a831f3eab637260064c91f87e48b7cbd3148d2 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 18 Dec 2019 11:11:45 +0100
Subject: [PATCH 11/18] Style.

---
 cpp/src/arrow/compare.cc            | 2 ++
 cpp/src/arrow/sparse_tensor.cc      | 2 --
 cpp/src/arrow/sparse_tensor_test.cc | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 6e521a32c03..d2322009ea8 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -1194,6 +1194,7 @@ inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexTyp
           checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(right);
       return SparseTensorEqualsImpl<SparseIndexType, SparseCSCIndex>::Compare(left,
                                                                               right_csc);
+    }
 
     case SparseTensorFormat::CSF: {
       const auto& right_csf =
@@ -1236,6 +1237,7 @@ bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right) {
     case SparseTensorFormat::CSC: {
       const auto& left_csc = checked_cast<const SparseTensorImpl<SparseCSCIndex>&>(left);
       return SparseTensorEqualsImplDispatch(left_csc, right);
+    }
 
     case SparseTensorFormat::CSF: {
       const auto& left_csf = checked_cast<const SparseTensorImpl<SparseCSFIndex>&>(left);
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 404cff5a841..4f73f7940db 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -735,8 +735,6 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
           internal::checked_cast<const SparseCSCIndex&>(*sparse_tensor->sparse_index());
       const std::shared_ptr<const Tensor> indptr = sparse_index.indptr();
       const std::shared_ptr<const Tensor> indices = sparse_index.indices();
-      const auto raw_data =
-          reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
 
       int64_t offset;
       for (int64_t j = 0; j < indptr->size() - 1; ++j) {
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index a5723aa2adb..de0a793ede9 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -933,7 +933,7 @@ class TestSparseCSFTensorBase : public ::testing::Test {
  protected:
   std::vector<int64_t> shape_;
   std::vector<std::string> dim_names_;
-  std::shared_ptr<SparseCSCMatrix> sparse_tensor_from_dense_;
+  std::shared_ptr<SparseCSFTensor> sparse_tensor_from_dense_;
 };
 
 class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};

From d9ff47e67c3e0147c0a5b142a459cf0ade82da88 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Fri, 10 Jan 2020 00:05:00 +0100
Subject: [PATCH 12/18] Further work and implementing review feedback.

---
 cpp/src/arrow/sparse_tensor.cc      | 127 ++++++-----
 cpp/src/arrow/sparse_tensor_test.cc | 322 +++++++++++++++++++---------
 2 files changed, 290 insertions(+), 159 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 4f73f7940db..c8f08c453ff 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -444,56 +444,70 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
 
     const int64_t ndim = tensor_.ndim();
     std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
+    int64_t nonzero_count = -1;
+    RETURN_NOT_OK(tensor_.CountNonZero(&nonzero_count));
 
-    if (ndim < 2) {
-      // LCOV_EXCL_START: The following invalid causes program failure.
-      return Status::Invalid("Invalid tensor dimension");
-      // LCOV_EXCL_STOP
-    }
+    std::shared_ptr<Buffer> values_buffer;
+    RETURN_NOT_OK(
+        AllocateBuffer(pool_, sizeof(value_type) * nonzero_count, &values_buffer));
+    value_type* values = reinterpret_cast<value_type*>(values_buffer->mutable_data());
 
-    std::shared_ptr<SparseCOOTensor> sparse_coo_tensor;
-    ARROW_ASSIGN_OR_RAISE(sparse_coo_tensor, SparseCOOTensor::Make(tensor_));
-    std::shared_ptr<Tensor> coords =
-        arrow::internal::checked_pointer_cast<SparseCOOIndex>(
-            sparse_coo_tensor->sparse_index())
-            ->indices();
+    std::vector<int64_t> counts(ndim, 0);
+    std::vector<int64_t> coord(ndim, 0);
+    std::vector<int64_t> previous_coord(ndim, -1);
+    std::vector<TypedBufferBuilder<c_index_value_type>> indptr_buffer_builders(ndim - 1);
+    std::vector<TypedBufferBuilder<c_index_value_type>> indices_buffer_builders(ndim);
 
-    // TODO(rok): Coords should be sorted with axis_order priority to improve compression.
-    // ARROW-4221 would help here as well.
+    if (ndim <= 1) {
+      return Status::NotImplemented("TODO for ndim <= 1");
+    } else {
+      const std::vector<int64_t>& shape = tensor_.shape();
+      for (int64_t n = tensor_.size(); n > 0; n--) {
+        const value_type x = tensor_.Value(coord);
 
-    // Convert SparseCOOTensor to long CSF buffers
-    const int64_t nonzero_count = sparse_coo_tensor->non_zero_length();
+        if (tensor_.Value(coord) != 0) {
+          bool tree_split = false;
+          *values++ = x;
 
-    std::vector<int64_t> counts(ndim);
-    std::fill_n(counts.begin(), ndim, static_cast<int64_t>(0));
-    std::vector<TypedBufferBuilder<c_index_value_type>> indptr_buffer_builders(ndim - 1);
-    std::vector<TypedBufferBuilder<c_index_value_type>> indices_buffer_builders(ndim);
+          for (int64_t i = 0; i < ndim; ++i) {
+            int64_t dimension = axis_order[i];
+            bool change = coord[dimension] != previous_coord[dimension];
+
+            if (tree_split || change) {
+              if (change) tree_split = true;
+
+              if (i < ndim - 1)
+                RETURN_NOT_OK(indptr_buffer_builders[i].Append(
+                    static_cast<c_index_value_type>(counts[dimension + 1])));
+              RETURN_NOT_OK(indices_buffer_builders[i].Append(
+                  static_cast<c_index_value_type>(coord[dimension])));
+              ++counts[dimension];
+            }
+          }
+          previous_coord = coord;
+        }
 
-    for (int64_t row = 0; row < nonzero_count; ++row) {
-      bool tree_split = false;
-      for (int64_t column = 0; column < ndim; ++column) {
-        int64_t dimension = axis_order[column];
-        bool change = coords->Value<IndexValueType>({row, dimension}) !=
-                      coords->Value<IndexValueType>({row - 1, dimension});
-
-        if (tree_split || change || row == 0) {
-          if (row > 1 || change) tree_split = true;
-
-          if (column < ndim - 1)
-            RETURN_NOT_OK(indptr_buffer_builders[column].Append(
-                static_cast<c_index_value_type>(counts[column + 1])));
-          RETURN_NOT_OK(
-              indices_buffer_builders[column].Append(static_cast<c_index_value_type>(
-                  coords->Value<IndexValueType>({row, dimension}))));
-          ++counts[column];
+        // increment index
+        ++coord[ndim - 1];
+        if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+          int64_t d = ndim - 1;
+          while (d > 0 && coord[d] == shape[d]) {
+            coord[d] = 0;
+            ++coord[d - 1];
+            --d;
+          }
         }
       }
     }
+
     for (int64_t column = 0; column < ndim - 1; ++column) {
       RETURN_NOT_OK(indptr_buffer_builders[column].Append(
           static_cast<c_index_value_type>(counts[column + 1])));
     }
 
+    // make results
+    data = values_buffer;
+
     std::vector<std::shared_ptr<Buffer>> indptr_buffers(ndim - 1);
     std::vector<std::shared_ptr<Buffer>> indices_buffers(ndim);
     std::vector<int64_t> indptr_shapes(counts.begin(), counts.end() - 1);
@@ -509,7 +523,6 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     ARROW_ASSIGN_OR_RAISE(
         sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
                                            indptr_buffers, indices_buffers));
-    data = sparse_coo_tensor->data();
     return Status::OK();
   }
 
@@ -647,11 +660,14 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
   }
 }
 
+namespace {
+
 template <typename TYPE, typename IndexValueType>
-void assign_values(int64_t dimension, int64_t offset, int64_t first_ptr, int64_t last_ptr,
-                   const SparseCSFIndex* sparse_index, const int64_t* raw_data,
-                   const std::vector<int64_t> strides,
-                   const std::vector<int64_t> axis_order, TYPE* out) {
+void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t first_ptr,
+                                 int64_t last_ptr, const SparseCSFIndex* sparse_index,
+                                 const int64_t* raw_data,
+                                 const std::vector<int64_t> strides,
+                                 const std::vector<int64_t> axis_order, TYPE* out) {
   int64_t ndim = axis_order.size();
 
   for (int64_t i = first_ptr; i < last_ptr; ++i) {
@@ -660,7 +676,7 @@ void assign_values(int64_t dimension, int64_t offset, int64_t first_ptr, int64_t
                      strides[axis_order[dimension]];
 
     if (dimension < ndim - 1)
-      assign_values<TYPE, IndexValueType>(
+      ExpandSparseCSFTensorValues<TYPE, IndexValueType>(
           dimension + 1, tmp_offset,
           sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
           sparse_index->indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
@@ -670,6 +686,8 @@ void assign_values(int64_t dimension, int64_t offset, int64_t first_ptr, int64_t
   }
 }
 
+}  // namespace
+
 template <typename TYPE, typename IndexValueType>
 Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_tensor,
                                   std::shared_ptr<Tensor>* out) {
@@ -753,13 +771,9 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
     case SparseTensorFormat::CSF: {
       const auto& sparse_index =
           internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
-      int64_t last_ptr_index = sparse_index.indptr()[0]->size() - 1;
-      int64_t first_ptr = sparse_index.indptr()[0]->Value<IndexValueType>({0});
-      int64_t last_ptr =
-          sparse_index.indptr()[0]->Value<IndexValueType>({last_ptr_index});
 
-      assign_values<value_type, IndexValueType>(
-          0, 0, first_ptr, last_ptr, &sparse_index,
+      ExpandSparseCSFTensorValues<value_type, IndexValueType>(
+          0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index,
           reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides,
           sparse_index.axis_order(), values);
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
@@ -985,10 +999,9 @@ Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
     indices[i] = std::make_shared<Tensor>(indices_type, indices_data[i],
                                           std::vector<int64_t>({indices_shapes[i]}));
 
-  ARROW_CHECK(CheckSparseCSFIndexValidity(indptr_type, indices_type, indptr.size(),
-                                          indices.size(), indptr.back()->shape(),
-                                          indices.back()->shape(), axis_order.size())
-                  .ok());
+  RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr_type, indices_type, indptr.size(),
+                                            indices.size(), indptr.back()->shape(),
+                                            indices.back()->shape(), axis_order.size()));
 
   return std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
 }
@@ -997,15 +1010,13 @@ Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
 SparseCSFIndex::SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
                                std::vector<std::shared_ptr<Tensor>>& indices,
                                const std::vector<int64_t>& axis_order)
-    : SparseIndexBase(indices.back()->shape()[0]),
+    : SparseIndexBase(indices.back()->size()),
       indptr_(indptr),
       indices_(indices),
       axis_order_(axis_order) {
-  ARROW_CHECK(CheckSparseCSFIndexValidity(indptr_.front()->type(),
-                                          indices_.front()->type(), indptr_.size(),
-                                          indices_.size(), indptr_.back()->shape(),
-                                          indices_.back()->shape(), axis_order_.size())
-                  .ok());
+  ARROW_CHECK_OK(CheckSparseCSFIndexValidity(
+      indptr_.front()->type(), indices_.front()->type(), indptr_.size(), indices_.size(),
+      indptr_.back()->shape(), indices_.back()->shape(), axis_order_.size()));
 }
 
 std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index de0a793ede9..6c9a64e61a1 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -26,6 +26,7 @@
 
 #include <gtest/gtest.h>
 
+#include <arrow/util/sort.h>
 #include "arrow/sparse_tensor.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
@@ -914,15 +915,24 @@ template <typename IndexValueType>
 class TestSparseCSFTensorBase : public ::testing::Test {
  public:
   void SetUp() {
-    shape_ = {4, 3, 5, 2};
+    shape_ = {3, 3, 3, 4};
     dim_names_ = {"a", "b", "c", "d"};
 
+    // COO representation:
+    //   X[1, 1, 1, 2] := 1
+    //   X[1, 1, 1, 4] := 2
+    //   X[1, 2, 1, 1] := 3
+    //   X[1, 2, 1, 3] := 4
+    //   X[1, 2, 2, 1] := 5
+    //   X[2, 2, 2, 1] := 6
+    //   X[2, 2, 2, 2] := 7
+    //   X[2, 2, 2, 3] := 8
+
     std::vector<int64_t> dense_values = {
-        0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 8};
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
     auto dense_data = Buffer::Wrap(dense_values);
     NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
     ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_,
@@ -938,51 +948,63 @@ class TestSparseCSFTensorBase : public ::testing::Test {
 
 class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
 
-TEST_F(TestSparseCSFTensor, CreationFromTensor) {
-  std::vector<int64_t> values = {
-      0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 8};
-  std::vector<int64_t> shape({4, 3, 5, 2});
-  std::vector<std::string> dim_names({"a", "b", "c", "d"});
+TEST_F(TestSparseCSFTensor, CreateFromBuffers1) {
+  std::vector<std::vector<int64_t>> indptr_values = {{0, 2, 3, 5}};
+  std::vector<std::vector<int64_t>> indices_values = {{0, 1, 3}, {0, 3, 1, 3, 5}};
+  std::vector<int64_t> indices_shapes({3, 5});
+  std::vector<int64_t> axis_order = {0, 1};
+  std::vector<std::string> dim_names({"a", "b"});
+  std::vector<int64_t> data_values = {1, 3, 2, 4, 5};
+  std::vector<int64_t> values = {1, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0,
+                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5};
+  std::vector<int64_t> shape({4, 6});
+
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers;
+  std::vector<std::shared_ptr<Buffer>> indices_buffers;
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
   std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
-  Tensor tensor(int64(), buffer, shape, {}, dim_names);
-
-  std::shared_ptr<SparseCSFTensor> st;
-  ASSERT_OK_AND_ASSIGN(st, SparseCSFTensor::Make(tensor));
+  for (auto& indptr : indptr_values) indptr_buffers.push_back(Buffer::Wrap(indptr));
+  for (auto& indices : indices_values) indices_buffers.push_back(Buffer::Wrap(indices));
 
-  std::vector<std::vector<int64_t>> indptr_values = {
-      {0, 1, 4, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 5, 6, 8}};
-  std::vector<std::vector<int64_t>> indices_values = {
-      {1, 0, 1}, {0, 0, 1, 0, 1, 2}, {0, 0, 0, 1, 3, 3}, {0, 1, 0, 0, 3, 4, 3, 4}};
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
-  std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
-  std::vector<int64_t> axis_order = {3, 1, 0, 2};
-  std::vector<int64_t> indices_shapes = {3, 6, 6, 8};
-
-  for (int64_t i = 0; i < static_cast<int64_t>(indptr_values.size()); ++i)
-    indptr_buffers[i] = Buffer::Wrap(indptr_values[i]);
-  for (int64_t i = 0; i < static_cast<int64_t>(indices_values.size()); ++i)
-    indices_buffers[i] = Buffer::Wrap(indices_values[i]);
+  Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
 
   std::shared_ptr<SparseCSFIndex> sparse_index;
   ASSERT_OK_AND_ASSIGN(sparse_index,
-                       SparseCSFIndex::Make(tensor.type(), indices_shapes, axis_order,
-                                            indptr_buffers, indices_buffers));
+                       SparseCSFIndex::Make(tensor.type(), tensor.type(), indices_shapes,
+                                            axis_order, indptr_buffers, indices_buffers));
+  std::shared_ptr<SparseCSFTensor> st = std::make_shared<SparseCSFTensor>(
+      sparse_index, int64(), data_buffer, shape, dim_names);
+  std::shared_ptr<Tensor> dt;
+  ASSERT_OK(st->ToTensor(&dt));
+  ASSERT_TRUE(tensor.Equals(*dt));
+}
 
-  const auto& si = internal::checked_cast<const SparseCSFIndex&>(*st->sparse_index());
-  ASSERT_EQ(8, st->non_zero_length());
-  ASSERT_TRUE(st->is_mutable());
-  ASSERT_TRUE(si.Equals(*sparse_index));
+TEST_F(TestSparseCSFTensor, CreateFromBuffers2) {
+  std::vector<std::vector<int64_t>> indptr_values = {{0, 1, 2, 4, 5}};
+  std::vector<std::vector<int64_t>> indices_values = {{0, 1, 3, 5}, {0, 1, 0, 3, 3}};
+  std::vector<int64_t> indices_shapes({4, 5});
+  std::vector<int64_t> axis_order = {1, 0};
+  std::vector<std::string> dim_names({"a", "b"});
+  std::vector<int64_t> data_values = {1, 2, 3, 4, 5};
+  std::vector<int64_t> values = {1, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0,
+                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5};
+  std::vector<int64_t> shape({4, 6});
+
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers;
+  std::vector<std::shared_ptr<Buffer>> indices_buffers;
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  for (auto& indptr : indptr_values) indptr_buffers.push_back(Buffer::Wrap(indptr));
+  for (auto& indices : indices_values) indices_buffers.push_back(Buffer::Wrap(indices));
 
-  ASSERT_EQ(dim_names, st->dim_names());
-  ASSERT_EQ("a", st->dim_name(0));
-  ASSERT_EQ("b", st->dim_name(1));
-  ASSERT_EQ("c", st->dim_name(2));
-  ASSERT_EQ("d", st->dim_name(3));
+  Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
 
+  std::shared_ptr<SparseCSFIndex> sparse_index;
+  ASSERT_OK_AND_ASSIGN(sparse_index,
+                       SparseCSFIndex::Make(tensor.type(), tensor.type(), indices_shapes,
+                                            axis_order, indptr_buffers, indices_buffers));
+  std::shared_ptr<SparseCSFTensor> st = std::make_shared<SparseCSFTensor>(
+      sparse_index, int64(), data_buffer, shape, dim_names);
   std::shared_ptr<Tensor> dt;
   ASSERT_OK(st->ToTensor(&dt));
   ASSERT_TRUE(tensor.Equals(*dt));
@@ -1022,85 +1044,183 @@ class TestSparseCSFTensorForIndexValueType
 
 TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType);
 
-TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorFromTensor) {
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestBufferToSparseTensor) {
   using IndexValueType = TypeParam;
   using c_index_value_type = typename IndexValueType::c_type;
 
-  std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
   std::vector<std::vector<c_index_value_type>> indptr_values = {
-      {0, 1, 4, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 5, 6, 8}};
+      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
   std::vector<std::vector<c_index_value_type>> indices_values = {
-      {1, 0, 1}, {0, 0, 1, 0, 1, 2}, {0, 0, 0, 1, 3, 3}, {0, 1, 0, 0, 3, 4, 3, 4}};
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
-  std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
-  std::vector<int64_t> axis_order = {3, 1, 0, 2};
-  std::vector<int64_t> sparse_tensor_shape({4, 3, 5, 2});
-  std::vector<int64_t> indices_shapes = {3, 6, 6, 8};
-  std::vector<std::string> dim_names({"a", "b", "c", "d"});
-
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
-  for (int64_t i = 0; i < static_cast<int64_t>(indptr_values.size()); ++i)
-    indptr_buffers[i] = Buffer::Wrap(indptr_values[i]);
-  for (int64_t i = 0; i < static_cast<int64_t>(indices_values.size()); ++i)
-    indices_buffers[i] = Buffer::Wrap(indices_values[i]);
-
-  std::shared_ptr<SparseCSFIndex> sparse_index;
+      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
+  std::vector<int64_t> indices_shapes = {2, 3, 4, 8};
+  std::vector<int64_t> axis_order = {0, 1, 2, 3};
+  std::vector<int64_t> sparse_values = {1, 2, 3, 4, 5, 6, 7, 8};
+  std::vector<int64_t> shape = {3, 3, 3, 4};
+  std::vector<std::string> dim_names = {"a", "b", "c", "d"};
+
+  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(sparse_values);
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers;
+  std::vector<std::shared_ptr<Buffer>> indices_buffers;
+  for (auto& indptr : indptr_values) indptr_buffers.push_back(Buffer::Wrap(indptr));
+  for (auto& indices : indices_values) indices_buffers.push_back(Buffer::Wrap(indices));
+
+  std::shared_ptr<SparseCSFIndex> si;
   ASSERT_OK_AND_ASSIGN(
-      sparse_index,
+      si,
       SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes,
                            axis_order, indptr_buffers, indices_buffers));
-  std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
-      sparse_index, int64(), data_buffer, sparse_tensor_shape, dim_names);
+  std::shared_ptr<SparseCSFTensor> st =
+      std::make_shared<SparseCSFTensor>(si, int64(), data_buffer, shape, dim_names);
 
-  ASSERT_TRUE(sparse_tensor->Equals(*this->sparse_tensor_from_dense_));
+  ASSERT_TRUE(st->Equals(*this->sparse_tensor_from_dense_));
 }
 
-TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestTensorToSparseTensor) {
   using IndexValueType = TypeParam;
-  using c_index_value_type = typename IndexValueType::c_type;
+  std::vector<int64_t> shape = {3, 3, 3, 4};
+  std::vector<int64_t> values = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
 
-  std::vector<int64_t> data_values = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<std::vector<c_index_value_type>> indptr_values = {
-      {0, 1, 4, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 5, 6, 8}};
-  std::vector<std::vector<c_index_value_type>> indices_values = {
-      {1, 0, 1}, {0, 0, 1, 0, 1, 2}, {0, 0, 0, 1, 3, 3}, {0, 1, 0, 0, 3, 4, 3, 4}};
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers(3);
-  std::vector<std::shared_ptr<Buffer>> indices_buffers(4);
-  std::vector<int64_t> axis_order = {3, 1, 0, 2};
-  std::vector<int64_t> indices_shapes = {3, 6, 6, 8};
-  std::vector<int64_t> sparse_tensor_shape({4, 3, 5, 2});
-  std::vector<std::string> dim_names({"a", "b", "c", "d"});
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
+  Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
 
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
-  for (int64_t i = 0; i < static_cast<int64_t>(indptr_values.size()); ++i)
-    indptr_buffers[i] = Buffer::Wrap(indptr_values[i]);
-  for (int64_t i = 0; i < static_cast<int64_t>(indices_values.size()); ++i)
-    indices_buffers[i] = Buffer::Wrap(indices_values[i]);
+  std::shared_ptr<SparseCSFTensor> sparse_tensor;
+  ASSERT_OK_AND_ASSIGN(
+      sparse_tensor,
+      SparseCSFTensor::Make(tensor, TypeTraits<IndexValueType>::type_singleton()));
 
+  ASSERT_EQ(8, sparse_tensor->non_zero_length());
+  ASSERT_TRUE(sparse_tensor->is_mutable());
+  ASSERT_TRUE(sparse_tensor->Equals(*this->sparse_tensor_from_dense_));
+}
+
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
+  std::vector<int64_t> shape = {3, 3, 3, 4};
   std::vector<int64_t> dense_values = {
-      0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 7, 0, 8};
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
   auto dense_data = Buffer::Wrap(dense_values);
-  Tensor tensor(int64(), dense_data, sparse_tensor_shape, {});
+  Tensor tensor(int64(), dense_data, shape, {});
 
-  std::shared_ptr<SparseCSFIndex> sparse_index;
-  ASSERT_OK_AND_ASSIGN(
-      sparse_index,
-      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes,
-                           axis_order, indptr_buffers, indices_buffers));
-  std::shared_ptr<SparseCSFTensor> sparse_tensor = std::make_shared<SparseCSFTensor>(
-      sparse_index, tensor.type(), data_buffer, sparse_tensor_shape, dim_names);
+  std::shared_ptr<Tensor> dense_tensor;
+  ASSERT_OK(this->sparse_tensor_from_dense_->ToTensor(&dense_tensor));
+  ASSERT_TRUE(tensor.Equals(*dense_tensor));
+}
+
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, CreateFromBuffers) {
+  using IndexValueType = TypeParam;
+  using c_index_value_type = typename IndexValueType::c_type;
+
+  std::vector<std::vector<c_index_value_type>> indptr_values_1 = {
+      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
+  std::vector<std::vector<c_index_value_type>> indices_values_1 = {
+      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
+  std::vector<int64_t> indices_shapes_1 = {2, 3, 4, 8};
+  std::vector<int64_t> axis_order_1 = {0, 1, 2, 3};
+  std::vector<int64_t> sparse_values_1 = {1, 2, 3, 4, 5, 6, 7, 8};
+  std::vector<int64_t> shape_1 = {3, 3, 3, 4};
+  std::vector<std::string> dim_names_1 = {"a", "b", "c", "d"};
+
+  std::vector<std::vector<c_index_value_type>> indptr_values_2 = {
+      {0, 2, 4, 6}, {0, 1, 2, 3, 4, 6, 7}, {0, 2, 3, 4, 5, 6, 7, 8}};
+  std::vector<std::vector<c_index_value_type>> indices_values_2 = {
+      {1, 2, 3}, {1, 2, 1, 2, 1, 2}, {2, 2, 1, 2, 1, 2, 2}, {1, 2, 2, 1, 2, 1, 1, 2}};
+  std::vector<int64_t> indices_shapes_2 = {3, 6, 7, 8};
+  std::vector<int64_t> axis_order_2 = {3, 0, 1, 2};
+  std::vector<int64_t> sparse_values_2 = {3, 5, 6, 1, 7, 2, 4, 8};
+  std::vector<int64_t> shape_2 = {5, 5, 5, 4};
+  std::vector<std::string> dim_names_2 = {"d", "a", "b", "c"};
+
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers_1;
+  std::vector<std::shared_ptr<Buffer>> indices_buffers_1;
+  for (auto& indptr : indptr_values_1) indptr_buffers_1.push_back(Buffer::Wrap(indptr));
+  for (auto& indices : indices_values_1)
+    indices_buffers_1.push_back(Buffer::Wrap(indices));
+
+  std::vector<std::shared_ptr<Buffer>> indptr_buffers_2;
+  std::vector<std::shared_ptr<Buffer>> indices_buffers_2;
+  for (auto& indptr : indptr_values_2) indptr_buffers_2.push_back(Buffer::Wrap(indptr));
+  for (auto& indices : indices_values_2)
+    indices_buffers_2.push_back(Buffer::Wrap(indices));
+
+  std::vector<int64_t> dense_values_1 = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
+
+  std::vector<int64_t> dense_values_2 = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  std::shared_ptr<Buffer> dense_buffer_1 = Buffer::Wrap(dense_values_1);
+  std::shared_ptr<Buffer> dense_buffer_2 = Buffer::Wrap(dense_values_2);
+  Tensor tensor_1(int64(), dense_buffer_1, shape_1, {}, dim_names_1);
+  Tensor tensor_2(int64(), dense_buffer_2, shape_2, {}, dim_names_2);
+  std::shared_ptr<Buffer> sparse_buffer_1 = Buffer::Wrap(sparse_values_1);
+  std::shared_ptr<Buffer> sparse_buffer_2 = Buffer::Wrap(sparse_values_2);
+
+  std::shared_ptr<SparseCSFIndex> si_1;
+  std::shared_ptr<SparseCSFIndex> si_2;
+  std::shared_ptr<SparseCSFIndex> si_3;
 
-  //  std::shared_ptr<Tensor> dense_tensor;
-  //  ASSERT_OK(sparse_tensor->ToTensor(&dense_tensor));
-  //  ASSERT_TRUE(tensor.Equals(*dense_tensor));
+  ASSERT_OK_AND_ASSIGN(
+      si_1,
+      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes_1,
+                           axis_order_1, indptr_buffers_1, indices_buffers_1));
+  ASSERT_OK_AND_ASSIGN(
+      si_2,
+      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes_2,
+                           axis_order_2, indptr_buffers_2, indices_buffers_2));
+  ASSERT_OK_AND_ASSIGN(
+      si_3,
+      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes_2,
+                           axis_order_2, indptr_buffers_2, indices_buffers_2));
+
+  std::shared_ptr<SparseCSFTensor> st_1 = std::make_shared<SparseCSFTensor>(
+      si_1, int64(), sparse_buffer_1, shape_1, dim_names_1);
+  std::shared_ptr<SparseCSFTensor> st_2 = std::make_shared<SparseCSFTensor>(
+      si_2, int64(), sparse_buffer_2, shape_1, dim_names_2);
+  std::shared_ptr<SparseCSFTensor> st_3 = std::make_shared<SparseCSFTensor>(
+      si_3, int64(), sparse_buffer_2, shape_2, dim_names_2);
+
+  std::shared_ptr<Tensor> dt_1;
+  std::shared_ptr<Tensor> dt_2;
+  std::shared_ptr<Tensor> dt_3;
+  ASSERT_OK(st_1->ToTensor(&dt_1));
+  ASSERT_OK(st_2->ToTensor(&dt_2));
+  ASSERT_OK(st_3->ToTensor(&dt_3));
+
+  ASSERT_TRUE(dt_1->Equals(*dt_2));
+  ASSERT_FALSE(dt_1->Equals(*dt_3));
+  ASSERT_TRUE(tensor_1.Equals(*dt_1));
+  ASSERT_TRUE(tensor_2.Equals(*dt_3));
 }
 
-REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType,
-                           TestSparseTensorFromTensor, TestSparseTensorToTensor);
+REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType, TestBufferToSparseTensor,
+                           TestTensorToSparseTensor, TestSparseTensorToTensor,
+                           CreateFromBuffers);
 
 INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestSparseCSFTensorForIndexValueType, Int8Type);
 INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestSparseCSFTensorForIndexValueType, UInt8Type);

From 3291abc907de160919e4aa3f4ce97551c0c24fc4 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 28 Jan 2020 15:12:13 +0100
Subject: [PATCH 13/18] Marking indptrBuffers, indicesBuffers and axisOrder
 required.

---
 format/SparseTensor.fbs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index e3e8df11d44..e637e5a3f1c 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -146,7 +146,7 @@ table SparseTensorIndexCSF {
   ///   2  3 1  3   1  1 2 3
 
   /// The type of values in indptrBuffers
-  indptrType: Int;
+  indptrType: Int (required);
 
   /// indptrBuffers stores the sparsity structure.
   /// Position in the indptrBuffers vector signifies the dimension.
@@ -154,10 +154,10 @@ table SparseTensorIndexCSF {
   ///
   ///   indptrBuffer(X) = [[0, 2, 3], [0, 1, 3, 4], [0, 2, 4, 5, 8]].
   ///
-  indptrBuffers: [Buffer];
+  indptrBuffers: [Buffer] (required);
 
   /// The type of values in indicesBuffers
-  indicesType: Int;
+  indicesType: Int (required);
 
   /// indicesBuffers stores the label of each node.
   /// Position in the indicesBuffers vector signifies the dimension.
@@ -165,14 +165,14 @@ table SparseTensorIndexCSF {
   ///
   ///   indicesBuffer(X) = [[1, 2], [1, 2, 2], [1, 1, 2, 2], [2, 3, 1, 3, 1, 1, 2, 3]].
   ///
-  indicesBuffers: [Buffer];
+  indicesBuffers: [Buffer] (required);
 
   /// axisOrder stores the sequence in which dimensions were traversed to produce the prefix tree.
   /// For example, the axisOrder for the above X is:
   ///
   ///   axisOrder(X) = [0, 1, 2, 3].
   ///
-  axisOrder: [Int];
+  axisOrder: [Int] (required);
 }
 
 union SparseTensorIndex {

From 28d38cb5e356be429f6521a88f579592feb07628 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 28 Jan 2020 15:22:45 +0100
Subject: [PATCH 14/18] Removing backslashes from comments.

---
 format/SparseTensor.fbs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index e637e5a3f1c..7ed302141ab 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -142,8 +142,8 @@ table SparseTensorIndexCSF {
   ///       1   2        2
   ///      /   / \       |
   ///     1   1   2      2
-  ///    / \ / \   \    /|\
-  ///   2  3 1  3   1  1 2 3
+  ///    /|  /|   |    /| |
+  ///   2 3 1 3   1   1 2 3
 
   /// The type of values in indptrBuffers
   indptrType: Int (required);

From 6f4f4a8f99961662de1cb369982be21c8fc00326 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Thu, 30 Jan 2020 17:11:36 +0100
Subject: [PATCH 15/18] Implementing feedback review.

---
 cpp/src/arrow/sparse_tensor.cc      |  62 +++--
 cpp/src/arrow/sparse_tensor_test.cc | 404 +++++++++++-----------------
 format/SparseTensor.fbs             |  64 +----
 3 files changed, 196 insertions(+), 334 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index c8f08c453ff..83e07baeb5a 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -17,7 +17,6 @@
 
 #include "arrow/sparse_tensor.h"
 
-#include <arrow/util/sort.h>
 #include <algorithm>
 #include <functional>
 #include <limits>
@@ -28,6 +27,7 @@
 #include "arrow/compare.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
+#include "arrow/util/sort.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -130,7 +130,6 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
             *indices++ = static_cast<c_index_value_type>(coord[i]);
           }
         }
-
         // increment index
         ++coord[ndim - 1];
         if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
@@ -443,6 +442,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     RETURN_NOT_OK(CheckMaximumValue(std::numeric_limits<c_index_value_type>::max()));
 
     const int64_t ndim = tensor_.ndim();
+    // Axis order as ascending order of dimension size is a good heuristic but is not
+    // necessarily optimal.
     std::vector<int64_t> axis_order = internal::ArgSort(tensor_.shape());
     int64_t nonzero_count = -1;
     RETURN_NOT_OK(tensor_.CountNonZero(&nonzero_count));
@@ -465,7 +466,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
       for (int64_t n = tensor_.size(); n > 0; n--) {
         const value_type x = tensor_.Value(coord);
 
-        if (tensor_.Value(coord) != 0) {
+        if (x != 0) {
           bool tree_split = false;
           *values++ = x;
 
@@ -476,24 +477,25 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
             if (tree_split || change) {
               if (change) tree_split = true;
 
-              if (i < ndim - 1)
+              if (i < ndim - 1) {
                 RETURN_NOT_OK(indptr_buffer_builders[i].Append(
-                    static_cast<c_index_value_type>(counts[dimension + 1])));
+                    static_cast<c_index_value_type>(counts[i + 1])));
+              }
               RETURN_NOT_OK(indices_buffer_builders[i].Append(
                   static_cast<c_index_value_type>(coord[dimension])));
-              ++counts[dimension];
+              ++counts[i];
             }
           }
           previous_coord = coord;
         }
-
         // increment index
-        ++coord[ndim - 1];
-        if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+        int64_t last_axis = axis_order[ndim - 1];
+        ++coord[last_axis];
+        if (n > 1 && coord[last_axis] == shape[last_axis]) {
           int64_t d = ndim - 1;
-          while (d > 0 && coord[d] == shape[d]) {
-            coord[d] = 0;
-            ++coord[d - 1];
+          while (d > 0 && coord[axis_order[d]] == shape[axis_order[d]]) {
+            coord[axis_order[d]] = 0;
+            ++coord[axis_order[d - 1]];
             --d;
           }
         }
@@ -513,12 +515,13 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
     std::vector<int64_t> indptr_shapes(counts.begin(), counts.end() - 1);
     std::vector<int64_t> indices_shapes = counts;
 
-    for (int64_t column = 0; column < ndim; ++column)
+    for (int64_t column = 0; column < ndim; ++column) {
       RETURN_NOT_OK(
           indices_buffer_builders[column].Finish(&indices_buffers[column], true));
-
-    for (int64_t column = 0; column < ndim - 1; ++column)
+    }
+    for (int64_t column = 0; column < ndim - 1; ++column) {
       RETURN_NOT_OK(indptr_buffer_builders[column].Finish(&indptr_buffers[column], true));
+    }
 
     ARROW_ASSIGN_OR_RAISE(
         sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order,
@@ -665,8 +668,7 @@ namespace {
 template <typename TYPE, typename IndexValueType>
 void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t first_ptr,
                                  int64_t last_ptr, const SparseCSFIndex* sparse_index,
-                                 const int64_t* raw_data,
-                                 const std::vector<int64_t> strides,
+                                 const TYPE* raw_data, const std::vector<int64_t> strides,
                                  const std::vector<int64_t> axis_order, TYPE* out) {
   int64_t ndim = axis_order.size();
 
@@ -675,14 +677,15 @@ void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t firs
         offset + sparse_index->indices()[dimension]->Value<IndexValueType>({i}) *
                      strides[axis_order[dimension]];
 
-    if (dimension < ndim - 1)
+    if (dimension < ndim - 1) {
       ExpandSparseCSFTensorValues<TYPE, IndexValueType>(
           dimension + 1, tmp_offset,
           sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
           sparse_index->indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
           raw_data, strides, axis_order, out);
-    else
-      out[tmp_offset] = static_cast<TYPE>(raw_data[i]);
+    } else {
+      out[tmp_offset] = raw_data[i];
+    }
   }
 }
 
@@ -703,8 +706,10 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
   std::fill_n(values, sparse_tensor->size(), static_cast<value_type>(0));
 
   std::vector<int64_t> strides(sparse_tensor->ndim(), 1);
-  for (int i = sparse_tensor->ndim() - 1; i > 0; --i)
+  for (int i = sparse_tensor->ndim() - 1; i > 0; --i) {
     strides[i - 1] *= strides[i] * sparse_tensor->shape()[i];
+  }
+  std::vector<int64_t> empty_strides;
 
   const auto raw_data = reinterpret_cast<const value_type*>(sparse_tensor->raw_data());
 
@@ -724,7 +729,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
         values[offset] = raw_data[i];
       }
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
-                                      sparse_tensor->shape());
+                                      sparse_tensor->shape(), empty_strides,
+                                      sparse_tensor->dim_names());
       return Status::OK();
     }
 
@@ -744,7 +750,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
         }
       }
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
-                                      sparse_tensor->shape());
+                                      sparse_tensor->shape(), empty_strides,
+                                      sparse_tensor->dim_names());
       return Status::OK();
     }
 
@@ -764,7 +771,8 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
         }
       }
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
-                                      sparse_tensor->shape());
+                                      sparse_tensor->shape(), empty_strides,
+                                      sparse_tensor->dim_names());
       return Status::OK();
     }
 
@@ -773,11 +781,11 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
           internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
 
       ExpandSparseCSFTensorValues<value_type, IndexValueType>(
-          0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index,
-          reinterpret_cast<const int64_t*>(sparse_tensor->raw_data()), strides,
+          0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index, raw_data, strides,
           sparse_index.axis_order(), values);
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
-                                      sparse_tensor->shape());
+                                      sparse_tensor->shape(), empty_strides,
+                                      sparse_tensor->dim_names());
       return Status::OK();
     }
   }
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 6c9a64e61a1..2b5186acda2 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -915,29 +915,25 @@ template <typename IndexValueType>
 class TestSparseCSFTensorBase : public ::testing::Test {
  public:
   void SetUp() {
-    shape_ = {3, 3, 3, 4};
     dim_names_ = {"a", "b", "c", "d"};
-
-    // COO representation:
-    //   X[1, 1, 1, 2] := 1
-    //   X[1, 1, 1, 4] := 2
-    //   X[1, 2, 1, 1] := 3
-    //   X[1, 2, 1, 3] := 4
-    //   X[1, 2, 2, 1] := 5
-    //   X[2, 2, 2, 1] := 6
-    //   X[2, 2, 2, 2] := 7
-    //   X[2, 2, 2, 3] := 8
-
-    std::vector<int64_t> dense_values = {
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-    auto dense_data = Buffer::Wrap(dense_values);
-    NumericTensor<Int64Type> dense_tensor(dense_data, shape_, {}, dim_names_);
-    ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_,
-                         SparseCSFTensor::Make(
-                             dense_tensor, TypeTraits<IndexValueType>::type_singleton()));
+    shape_ = {2, 3, 4, 5};
+    int16_t dense_values[2][3][4][5] = {};  // zero-initialized
+
+    dense_values[0][0][0][1] = 1;
+    dense_values[0][0][0][2] = 2;
+    dense_values[0][1][0][0] = 3;
+    dense_values[0][1][0][2] = 4;
+    dense_values[0][1][1][0] = 5;
+    dense_values[1][1][1][0] = 6;
+    dense_values[1][1][1][1] = 7;
+    dense_values[1][1][1][2] = 8;
+
+    auto dense_buffer = Buffer::Wrap(dense_values, sizeof(dense_values));
+    Tensor dense_tensor_(int16(), dense_buffer, shape_, {}, dim_names_);
+    ASSERT_OK_AND_ASSIGN(
+        sparse_tensor_from_dense_,
+        SparseCSFTensor::Make(dense_tensor_,
+                              TypeTraits<IndexValueType>::type_singleton()));
   }
 
  protected:
@@ -948,279 +944,199 @@ class TestSparseCSFTensorBase : public ::testing::Test {
 
 class TestSparseCSFTensor : public TestSparseCSFTensorBase<Int64Type> {};
 
-TEST_F(TestSparseCSFTensor, CreateFromBuffers1) {
-  std::vector<std::vector<int64_t>> indptr_values = {{0, 2, 3, 5}};
-  std::vector<std::vector<int64_t>> indices_values = {{0, 1, 3}, {0, 3, 1, 3, 5}};
-  std::vector<int64_t> indices_shapes({3, 5});
-  std::vector<int64_t> axis_order = {0, 1};
-  std::vector<std::string> dim_names({"a", "b"});
-  std::vector<int64_t> data_values = {1, 3, 2, 4, 5};
-  std::vector<int64_t> values = {1, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0,
-                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5};
-  std::vector<int64_t> shape({4, 6});
-
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers;
-  std::vector<std::shared_ptr<Buffer>> indices_buffers;
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
-  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
-  for (auto& indptr : indptr_values) indptr_buffers.push_back(Buffer::Wrap(indptr));
-  for (auto& indices : indices_values) indices_buffers.push_back(Buffer::Wrap(indices));
-
-  Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
-
-  std::shared_ptr<SparseCSFIndex> sparse_index;
-  ASSERT_OK_AND_ASSIGN(sparse_index,
-                       SparseCSFIndex::Make(tensor.type(), tensor.type(), indices_shapes,
-                                            axis_order, indptr_buffers, indices_buffers));
-  std::shared_ptr<SparseCSFTensor> st = std::make_shared<SparseCSFTensor>(
-      sparse_index, int64(), data_buffer, shape, dim_names);
-  std::shared_ptr<Tensor> dt;
-  ASSERT_OK(st->ToTensor(&dt));
-  ASSERT_TRUE(tensor.Equals(*dt));
-}
-
-TEST_F(TestSparseCSFTensor, CreateFromBuffers2) {
-  std::vector<std::vector<int64_t>> indptr_values = {{0, 1, 2, 4, 5}};
-  std::vector<std::vector<int64_t>> indices_values = {{0, 1, 3, 5}, {0, 1, 0, 3, 3}};
-  std::vector<int64_t> indices_shapes({4, 5});
-  std::vector<int64_t> axis_order = {1, 0};
-  std::vector<std::string> dim_names({"a", "b"});
-  std::vector<int64_t> data_values = {1, 2, 3, 4, 5};
-  std::vector<int64_t> values = {1, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0,
-                                 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5};
-  std::vector<int64_t> shape({4, 6});
-
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers;
-  std::vector<std::shared_ptr<Buffer>> indices_buffers;
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(data_values);
-  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
-  for (auto& indptr : indptr_values) indptr_buffers.push_back(Buffer::Wrap(indptr));
-  for (auto& indices : indices_values) indices_buffers.push_back(Buffer::Wrap(indices));
-
-  Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
-
-  std::shared_ptr<SparseCSFIndex> sparse_index;
-  ASSERT_OK_AND_ASSIGN(sparse_index,
-                       SparseCSFIndex::Make(tensor.type(), tensor.type(), indices_shapes,
-                                            axis_order, indptr_buffers, indices_buffers));
-  std::shared_ptr<SparseCSFTensor> st = std::make_shared<SparseCSFTensor>(
-      sparse_index, int64(), data_buffer, shape, dim_names);
-  std::shared_ptr<Tensor> dt;
-  ASSERT_OK(st->ToTensor(&dt));
-  ASSERT_TRUE(tensor.Equals(*dt));
-}
-
 template <typename IndexValueType>
 class TestSparseCSFTensorForIndexValueType
     : public TestSparseCSFTensorBase<IndexValueType> {
  protected:
   std::shared_ptr<SparseCSFIndex> MakeSparseCSFIndex(
-      std::vector<typename IndexValueType::c_type>& indptr_values,
-      std::vector<typename IndexValueType::c_type>& indices_values,
-      const std::vector<int64_t>& indptr_offsets,
-      const std::vector<int64_t>& indices_offsets,
-      const std::vector<int64_t>& indptr_shape, const std::vector<int64_t>& indices_shape,
-      const std::vector<int64_t>& axis_order) const {
-    auto indptr_data = Buffer::Wrap(indptr_values);
-    auto indices_data = Buffer::Wrap(indices_values);
-    auto indptr =
-        std::make_shared<NumericTensor<IndexValueType>>(indptr_data, indptr_shape);
-    auto indices =
-        std::make_shared<NumericTensor<IndexValueType>>(indices_data, indices_shape);
-    return std::make_shared<SparseCSFIndex>(indptr, indices, indptr_offsets,
-                                            indices_offsets, axis_order);
+      const std::vector<int64_t> axis_order,
+      std::vector<std::vector<typename IndexValueType::c_type>>& indptr_values,
+      std::vector<std::vector<typename IndexValueType::c_type>>& indices_values) const {
+    int64_t ndim = axis_order.size();
+    std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
+    std::vector<std::shared_ptr<Tensor>> indices(ndim);
+
+    for (int64_t i = 0; i < ndim - 1; ++i) {
+      indptr[i] = std::make_shared<Tensor>(
+          TypeTraits<IndexValueType>::type_singleton(), Buffer::Wrap(indptr_values[i]),
+          std::vector<int64_t>({static_cast<int64_t>(indptr_values[i].size())}));
+    }
+    for (int64_t i = 0; i < ndim; ++i) {
+      indices[i] = std::make_shared<Tensor>(
+          TypeTraits<IndexValueType>::type_singleton(), Buffer::Wrap(indices_values[i]),
+          std::vector<int64_t>({static_cast<int64_t>(indices_values[i].size())}));
+    }
+    return std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
   }
 
   template <typename CValueType>
   std::shared_ptr<SparseCSFTensor> MakeSparseTensor(
-      const std::shared_ptr<SparseCSFIndex>& si,
-      std::vector<CValueType>& sparse_values) const {
-    auto data = Buffer::Wrap(sparse_values);
-    return std::make_shared<SparseCSFTensor>(si,
-                                             CTypeTraits<CValueType>::type_singleton(),
-                                             data, this->shape_, this->dim_names_);
+      const std::shared_ptr<SparseCSFIndex>& si, std::vector<CValueType>& sparse_values,
+      const std::vector<int64_t> shape, const std::vector<std::string> dim_names) const {
+    auto data_buffer = Buffer::Wrap(sparse_values);
+    return std::make_shared<SparseCSFTensor>(
+        si, CTypeTraits<CValueType>::type_singleton(), data_buffer, shape, dim_names);
   }
 };
 
 TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType);
 
-TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestBufferToSparseTensor) {
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor) {
   using IndexValueType = TypeParam;
   using c_index_value_type = typename IndexValueType::c_type;
 
+  std::vector<int64_t> shape = {2, 3, 4, 5};
+  std::vector<std::string> dim_names = {"a", "b", "c", "d"};
+  std::vector<int64_t> axis_order = {0, 1, 2, 3};
+  std::vector<int16_t> sparse_values = {1, 2, 3, 4, 5, 6, 7, 8};
   std::vector<std::vector<c_index_value_type>> indptr_values = {
       {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
   std::vector<std::vector<c_index_value_type>> indices_values = {
-      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
-  std::vector<int64_t> indices_shapes = {2, 3, 4, 8};
-  std::vector<int64_t> axis_order = {0, 1, 2, 3};
-  std::vector<int64_t> sparse_values = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<int64_t> shape = {3, 3, 3, 4};
-  std::vector<std::string> dim_names = {"a", "b", "c", "d"};
+      {0, 1}, {0, 1, 1}, {0, 0, 1, 1}, {1, 2, 0, 2, 0, 0, 1, 2}};
 
-  std::shared_ptr<Buffer> data_buffer = Buffer::Wrap(sparse_values);
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers;
-  std::vector<std::shared_ptr<Buffer>> indices_buffers;
-  for (auto& indptr : indptr_values) indptr_buffers.push_back(Buffer::Wrap(indptr));
-  for (auto& indices : indices_values) indices_buffers.push_back(Buffer::Wrap(indices));
-
-  std::shared_ptr<SparseCSFIndex> si;
-  ASSERT_OK_AND_ASSIGN(
-      si,
-      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes,
-                           axis_order, indptr_buffers, indices_buffers));
-  std::shared_ptr<SparseCSFTensor> st =
-      std::make_shared<SparseCSFTensor>(si, int64(), data_buffer, shape, dim_names);
+  auto si = this->MakeSparseCSFIndex(axis_order, indptr_values, indices_values);
+  auto st = this->MakeSparseTensor(si, sparse_values, shape, dim_names);
 
   ASSERT_TRUE(st->Equals(*this->sparse_tensor_from_dense_));
 }
 
 TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestTensorToSparseTensor) {
   using IndexValueType = TypeParam;
-  std::vector<int64_t> shape = {3, 3, 3, 4};
-  std::vector<int64_t> values = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-
-  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
-  Tensor tensor(int64(), buffer, shape, {}, this->dim_names_);
+  std::vector<int64_t> shape = {2, 3, 4, 5};
+  int16_t dense_values[2][3][4][5] = {};  // zero-initialized
+  dense_values[0][0][0][1] = 1;
+  dense_values[0][0][0][2] = 2;
+  dense_values[0][1][0][0] = 3;
+  dense_values[0][1][0][2] = 4;
+  dense_values[0][1][1][0] = 5;
+  dense_values[1][1][1][0] = 6;
+  dense_values[1][1][1][1] = 7;
+  dense_values[1][1][1][2] = 8;
+  auto dense_buffer = Buffer::Wrap(dense_values, sizeof(dense_values));
+  Tensor dense_tensor(int16(), dense_buffer, shape, {}, this->dim_names_);
 
   std::shared_ptr<SparseCSFTensor> sparse_tensor;
   ASSERT_OK_AND_ASSIGN(
       sparse_tensor,
-      SparseCSFTensor::Make(tensor, TypeTraits<IndexValueType>::type_singleton()));
+      SparseCSFTensor::Make(dense_tensor, TypeTraits<IndexValueType>::type_singleton()));
 
   ASSERT_EQ(8, sparse_tensor->non_zero_length());
   ASSERT_TRUE(sparse_tensor->is_mutable());
   ASSERT_TRUE(sparse_tensor->Equals(*this->sparse_tensor_from_dense_));
+  ASSERT_EQ(sparse_tensor->dim_names(), dense_tensor.dim_names());
 }
 
 TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {
-  std::vector<int64_t> shape = {3, 3, 3, 4};
-  std::vector<int64_t> dense_values = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-  auto dense_data = Buffer::Wrap(dense_values);
-  Tensor tensor(int64(), dense_data, shape, {});
+  std::vector<int64_t> shape = {2, 3, 4, 5};
+  int16_t dense_values[2][3][4][5] = {};  // zero-initialized
+  dense_values[0][0][0][1] = 1;
+  dense_values[0][0][0][2] = 2;
+  dense_values[0][1][0][0] = 3;
+  dense_values[0][1][0][2] = 4;
+  dense_values[0][1][1][0] = 5;
+  dense_values[1][1][1][0] = 6;
+  dense_values[1][1][1][1] = 7;
+  dense_values[1][1][1][2] = 8;
+  auto dense_buffer = Buffer::Wrap(dense_values, sizeof(dense_values));
+  Tensor dense_tensor(int16(), dense_buffer, shape, {}, this->dim_names_);
 
-  std::shared_ptr<Tensor> dense_tensor;
-  ASSERT_OK(this->sparse_tensor_from_dense_->ToTensor(&dense_tensor));
-  ASSERT_TRUE(tensor.Equals(*dense_tensor));
+  std::shared_ptr<Tensor> dt;
+  ASSERT_OK(this->sparse_tensor_from_dense_->ToTensor(&dt));
+  ASSERT_TRUE(dense_tensor.Equals(*dt));
+  ASSERT_EQ(dense_tensor.dim_names(), dt->dim_names());
 }
 
-TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, CreateFromBuffers) {
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestRoundTrip) {
   using IndexValueType = TypeParam;
-  using c_index_value_type = typename IndexValueType::c_type;
-
-  std::vector<std::vector<c_index_value_type>> indptr_values_1 = {
-      {0, 2, 3}, {0, 1, 3, 4}, {0, 2, 4, 5, 8}};
-  std::vector<std::vector<c_index_value_type>> indices_values_1 = {
-      {1, 2}, {1, 2, 2}, {1, 1, 2, 2}, {2, 3, 1, 3, 1, 1, 2, 3}};
-  std::vector<int64_t> indices_shapes_1 = {2, 3, 4, 8};
-  std::vector<int64_t> axis_order_1 = {0, 1, 2, 3};
-  std::vector<int64_t> sparse_values_1 = {1, 2, 3, 4, 5, 6, 7, 8};
-  std::vector<int64_t> shape_1 = {3, 3, 3, 4};
-  std::vector<std::string> dim_names_1 = {"a", "b", "c", "d"};
-
-  std::vector<std::vector<c_index_value_type>> indptr_values_2 = {
-      {0, 2, 4, 6}, {0, 1, 2, 3, 4, 6, 7}, {0, 2, 3, 4, 5, 6, 7, 8}};
-  std::vector<std::vector<c_index_value_type>> indices_values_2 = {
-      {1, 2, 3}, {1, 2, 1, 2, 1, 2}, {2, 2, 1, 2, 1, 2, 2}, {1, 2, 2, 1, 2, 1, 1, 2}};
-  std::vector<int64_t> indices_shapes_2 = {3, 6, 7, 8};
-  std::vector<int64_t> axis_order_2 = {3, 0, 1, 2};
-  std::vector<int64_t> sparse_values_2 = {3, 5, 6, 1, 7, 2, 4, 8};
-  std::vector<int64_t> shape_2 = {5, 5, 5, 4};
-  std::vector<std::string> dim_names_2 = {"d", "a", "b", "c"};
-
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers_1;
-  std::vector<std::shared_ptr<Buffer>> indices_buffers_1;
-  for (auto& indptr : indptr_values_1) indptr_buffers_1.push_back(Buffer::Wrap(indptr));
-  for (auto& indices : indices_values_1)
-    indices_buffers_1.push_back(Buffer::Wrap(indices));
-
-  std::vector<std::shared_ptr<Buffer>> indptr_buffers_2;
-  std::vector<std::shared_ptr<Buffer>> indices_buffers_2;
-  for (auto& indptr : indptr_values_2) indptr_buffers_2.push_back(Buffer::Wrap(indptr));
-  for (auto& indices : indices_values_2)
-    indices_buffers_2.push_back(Buffer::Wrap(indices));
-
-  std::vector<int64_t> dense_values_1 = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8};
-
-  std::vector<int64_t> dense_values_2 = {
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 3, 0, 4, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
-  std::shared_ptr<Buffer> dense_buffer_1 = Buffer::Wrap(dense_values_1);
-  std::shared_ptr<Buffer> dense_buffer_2 = Buffer::Wrap(dense_values_2);
-  Tensor tensor_1(int64(), dense_buffer_1, shape_1, {}, dim_names_1);
-  Tensor tensor_2(int64(), dense_buffer_2, shape_2, {}, dim_names_2);
-  std::shared_ptr<Buffer> sparse_buffer_1 = Buffer::Wrap(sparse_values_1);
-  std::shared_ptr<Buffer> sparse_buffer_2 = Buffer::Wrap(sparse_values_2);
-
-  std::shared_ptr<SparseCSFIndex> si_1;
-  std::shared_ptr<SparseCSFIndex> si_2;
-  std::shared_ptr<SparseCSFIndex> si_3;
 
+  std::shared_ptr<Tensor> dt;
+  ASSERT_OK(this->sparse_tensor_from_dense_->ToTensor(&dt));
+  std::shared_ptr<SparseCSFTensor> st;
   ASSERT_OK_AND_ASSIGN(
-      si_1,
-      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes_1,
-                           axis_order_1, indptr_buffers_1, indices_buffers_1));
-  ASSERT_OK_AND_ASSIGN(
-      si_2,
-      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes_2,
-                           axis_order_2, indptr_buffers_2, indices_buffers_2));
-  ASSERT_OK_AND_ASSIGN(
-      si_3,
-      SparseCSFIndex::Make(TypeTraits<IndexValueType>::type_singleton(), indices_shapes_2,
-                           axis_order_2, indptr_buffers_2, indices_buffers_2));
-
-  std::shared_ptr<SparseCSFTensor> st_1 = std::make_shared<SparseCSFTensor>(
-      si_1, int64(), sparse_buffer_1, shape_1, dim_names_1);
-  std::shared_ptr<SparseCSFTensor> st_2 = std::make_shared<SparseCSFTensor>(
-      si_2, int64(), sparse_buffer_2, shape_1, dim_names_2);
-  std::shared_ptr<SparseCSFTensor> st_3 = std::make_shared<SparseCSFTensor>(
-      si_3, int64(), sparse_buffer_2, shape_2, dim_names_2);
-
-  std::shared_ptr<Tensor> dt_1;
-  std::shared_ptr<Tensor> dt_2;
-  std::shared_ptr<Tensor> dt_3;
+      st, SparseCSFTensor::Make(*dt, TypeTraits<IndexValueType>::type_singleton()));
+
+  ASSERT_TRUE(st->Equals(*this->sparse_tensor_from_dense_));
+}
+
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestAlternativeAxisOrder) {
+  using IndexValueType = TypeParam;
+  using c_index_value_type = typename IndexValueType::c_type;
+
+  std::vector<int16_t> dense_values = {1, 0, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0,
+                                       0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5};
+  std::vector<int64_t> shape = {4, 6};
+  std::vector<std::string> dim_names = {"a", "b"};
+  std::shared_ptr<Buffer> dense_buffer = Buffer::Wrap(dense_values);
+  Tensor tensor(int16(), dense_buffer, shape, {}, dim_names);
+
+  // Axis order 1
+  std::vector<int64_t> axis_order_1 = {0, 1};
+  std::vector<int16_t> sparse_values_1 = {1, 3, 2, 4, 5};
+  std::vector<std::vector<c_index_value_type>> indptr_values_1 = {{0, 2, 3, 5}};
+  std::vector<std::vector<c_index_value_type>> indices_values_1 = {{0, 1, 3},
+                                                                   {0, 3, 1, 3, 5}};
+  auto si_1 = this->MakeSparseCSFIndex(axis_order_1, indptr_values_1, indices_values_1);
+  auto st_1 = this->MakeSparseTensor(si_1, sparse_values_1, shape, dim_names);
+
+  // Axis order 2
+  std::vector<int64_t> axis_order_2 = {1, 0};
+  std::vector<int16_t> sparse_values_2 = {1, 2, 3, 4, 5};
+  std::vector<std::vector<c_index_value_type>> indptr_values_2 = {{0, 1, 2, 4, 5}};
+  std::vector<std::vector<c_index_value_type>> indices_values_2 = {{0, 1, 3, 5},
+                                                                   {0, 1, 0, 3, 3}};
+  auto si_2 = this->MakeSparseCSFIndex(axis_order_2, indptr_values_2, indices_values_2);
+  auto st_2 = this->MakeSparseTensor(si_2, sparse_values_2, shape, dim_names);
+
+  std::shared_ptr<Tensor> dt_1, dt_2;
   ASSERT_OK(st_1->ToTensor(&dt_1));
   ASSERT_OK(st_2->ToTensor(&dt_2));
-  ASSERT_OK(st_3->ToTensor(&dt_3));
 
+  ASSERT_FALSE(st_1->Equals(*st_2));
   ASSERT_TRUE(dt_1->Equals(*dt_2));
-  ASSERT_FALSE(dt_1->Equals(*dt_3));
-  ASSERT_TRUE(tensor_1.Equals(*dt_1));
-  ASSERT_TRUE(tensor_2.Equals(*dt_3));
+  ASSERT_TRUE(dt_1->Equals(tensor));
+}
+
+TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestNonAscendingShape) {
+  using IndexValueType = TypeParam;
+  using c_index_value_type = typename IndexValueType::c_type;
+
+  std::vector<int64_t> shape = {5, 2, 3, 4};
+  int16_t dense_values[5][2][3][4] = {};  // zero-initialized
+  dense_values[0][0][0][1] = 1;
+  dense_values[0][0][0][2] = 2;
+  dense_values[0][1][0][0] = 3;
+  dense_values[0][1][0][2] = 4;
+  dense_values[0][1][1][0] = 5;
+  dense_values[1][1][1][0] = 6;
+  dense_values[1][1][1][1] = 7;
+  dense_values[1][1][1][2] = 8;
+  auto dense_buffer = Buffer::Wrap(dense_values, sizeof(dense_values));
+  Tensor dense_tensor(int16(), dense_buffer, shape, {}, this->dim_names_);
+
+  std::shared_ptr<SparseCSFTensor> sparse_tensor;
+  ASSERT_OK_AND_ASSIGN(
+      sparse_tensor,
+      SparseCSFTensor::Make(dense_tensor, TypeTraits<IndexValueType>::type_singleton()));
+
+  std::vector<std::vector<c_index_value_type>> indptr_values = {
+      {0, 1, 3}, {0, 2, 4, 7}, {0, 1, 2, 3, 4, 6, 7, 8}};
+  std::vector<std::vector<c_index_value_type>> indices_values = {
+      {0, 1}, {0, 0, 1}, {1, 2, 0, 2, 0, 1, 2}, {0, 0, 0, 0, 0, 1, 1, 1}};
+  std::vector<int64_t> axis_order = {1, 2, 3, 0};
+  std::vector<int16_t> sparse_values = {1, 2, 3, 4, 5, 6, 7, 8};
+  auto si = this->MakeSparseCSFIndex(axis_order, indptr_values, indices_values);
+  auto st = this->MakeSparseTensor(si, sparse_values, shape, this->dim_names_);
+
+  std::shared_ptr<Tensor> dt;
+  ASSERT_OK(st->ToTensor(&dt));
+  ASSERT_TRUE(dt->Equals(dense_tensor));
+  ASSERT_TRUE(st->Equals(*sparse_tensor));
 }
 
-REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType, TestBufferToSparseTensor,
+REGISTER_TYPED_TEST_CASE_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor,
                            TestTensorToSparseTensor, TestSparseTensorToTensor,
-                           CreateFromBuffers);
+                           TestAlternativeAxisOrder, TestNonAscendingShape,
+                           TestRoundTrip);
 
 INSTANTIATE_TYPED_TEST_CASE_P(TestInt8, TestSparseCSFTensorForIndexValueType, Int8Type);
 INSTANTIATE_TYPED_TEST_CASE_P(TestUInt8, TestSparseCSFTensorForIndexValueType, UInt8Type);
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 7ed302141ab..9c8ddae0b7c 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -114,71 +114,9 @@ table SparseMatrixIndexCSX {
   indicesBuffer: Buffer (required);
 }
 
-/// Compressed Sparse Fiber (CSF) sparse tensor index.
-table SparseTensorIndexCSF {
-  /// CSF is a generalization of compressed sparse row (CSR) index.
-  /// See [smith2017knl]: http://shaden.io/pub-files/smith2017knl.pdf
-  ///
-  /// CSF recursively compresses each mode of the tensor into a set
-  /// of prefix trees. Each path from a root to leaf forms one tensor
-  /// non-zero index. CSF is implemented with two buffers and three arrays.
-  ///
-  /// For example, let X be a 3x3x3x4 tensor, and it has the following
-  /// 8 non-zero values:
-  ///
-  ///   X[1, 1, 1, 2] := 1
-  ///   X[1, 1, 1, 3] := 2
-  ///   X[1, 2, 1, 1] := 3
-  ///   X[1, 2, 1, 3] := 4
-  ///   X[1, 2, 2, 1] := 5
-  ///   X[2, 2, 2, 1] := 6
-  ///   X[2, 2, 2, 2] := 7
-  ///   X[2, 2, 2, 3] := 8
-  ///
-  /// As a prefix tree this would be represented be:
-  ///
-  ///         1          2
-  ///        / \         |
-  ///       1   2        2
-  ///      /   / \       |
-  ///     1   1   2      2
-  ///    /|  /|   |    /| |
-  ///   2 3 1 3   1   1 2 3
-
-  /// The type of values in indptrBuffers
-  indptrType: Int (required);
-
-  /// indptrBuffers stores the sparsity structure.
-  /// Position in the indptrBuffers vector signifies the dimension.
-  /// For example, the indptrBuffers for the above X is:
-  ///
-  ///   indptrBuffer(X) = [[0, 2, 3], [0, 1, 3, 4], [0, 2, 4, 5, 8]].
-  ///
-  indptrBuffers: [Buffer] (required);
-
-  /// The type of values in indicesBuffers
-  indicesType: Int (required);
-
-  /// indicesBuffers stores the label of each node.
-  /// Position in the indicesBuffers vector signifies the dimension.
-  /// For example, the indicesBuffers for the above X is:
-  ///
-  ///   indicesBuffer(X) = [[1, 2], [1, 2, 2], [1, 1, 2, 2], [2, 3, 1, 3, 1, 1, 2, 3]].
-  ///
-  indicesBuffers: [Buffer] (required);
-
-  /// axisOrder stores the sequence in which dimensions were traversed to produce the prefix tree.
-  /// For example, the axisOrder for the above X is:
-  ///
-  ///   axisOrder(X) = [0, 1, 2, 3].
-  ///
-  axisOrder: [Int] (required);
-}
-
 union SparseTensorIndex {
   SparseTensorIndexCOO,
-  SparseMatrixIndexCSX,
-  SparseTensorIndexCSF
+  SparseMatrixIndexCSX
 }
 
 table SparseTensor {

From 11b81bb044bc99864977a7596e44fdb77e8642de Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Mon, 3 Feb 2020 14:40:05 +0100
Subject: [PATCH 16/18] Factoring out index incrementing for dense to COO and
 CSF indices.

---
 cpp/src/arrow/sparse_tensor.cc | 54 ++++++++++++++++++++++------------
 1 file changed, 35 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 83e07baeb5a..609cd2321d6 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -57,6 +57,37 @@ class SparseTensorConverter {
   Status Convert() { return Status::Invalid("Unsupported sparse index"); }
 };
 
+// ----------------------------------------------------------------------
+// IncrementIndex for SparseCOOIndex and SparseCSFIndex
+
+void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shape) {
+  const int64_t ndim = shape.size();
+  ++coord[ndim - 1];
+  if (coord[ndim - 1] == shape[ndim - 1]) {
+    int64_t d = ndim - 1;
+    while (d > 0 && coord[d] == shape[d]) {
+      coord[d] = 0;
+      ++coord[d - 1];
+      --d;
+    }
+  }
+}
+
+void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shape,
+                    std::vector<int64_t> axis_order) {
+  const int64_t ndim = shape.size();
+  const int64_t last_axis = axis_order[ndim - 1];
+  ++coord[last_axis];
+  if (coord[last_axis] == shape[last_axis]) {
+    int64_t d = ndim - 1;
+    while (d > 0 && coord[axis_order[d]] == shape[axis_order[d]]) {
+      coord[axis_order[d]] = 0;
+      ++coord[axis_order[d - 1]];
+      --d;
+    }
+  }
+}
+
 // ----------------------------------------------------------------------
 // SparseTensorConverter for SparseCOOIndex
 
@@ -130,15 +161,8 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
             *indices++ = static_cast<c_index_value_type>(coord[i]);
           }
         }
-        // increment index
-        ++coord[ndim - 1];
-        if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
-          int64_t d = ndim - 1;
-          while (d > 0 && coord[d] == shape[d]) {
-            coord[d] = 0;
-            ++coord[d - 1];
-            --d;
-          }
+        if (n > 1) {
+          IncrementIndex(coord, shape);
         }
       }
     }
@@ -488,16 +512,8 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
           }
           previous_coord = coord;
         }
-        // increment index
-        int64_t last_axis = axis_order[ndim - 1];
-        ++coord[last_axis];
-        if (n > 1 && coord[last_axis] == shape[last_axis]) {
-          int64_t d = ndim - 1;
-          while (d > 0 && coord[axis_order[d]] == shape[axis_order[d]]) {
-            coord[axis_order[d]] = 0;
-            ++coord[axis_order[d - 1]];
-            --d;
-          }
+        if (n > 1) {
+          IncrementIndex(coord, shape, axis_order);
         }
       }
     }

From 1b922f6ae8a719b0050ec3674dd32085e5b48e3e Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Tue, 4 Feb 2020 22:24:41 +0100
Subject: [PATCH 17/18] Implementing review feedback.

---
 cpp/src/arrow/python/serialize.cc   |  5 ++-
 cpp/src/arrow/sparse_tensor.cc      | 60 ++++++++++++++++-------------
 cpp/src/arrow/sparse_tensor.h       | 12 ++----
 cpp/src/arrow/sparse_tensor_test.cc | 34 ++++------------
 4 files changed, 47 insertions(+), 64 deletions(-)

diff --git a/cpp/src/arrow/python/serialize.cc b/cpp/src/arrow/python/serialize.cc
index 88d763b7877..06c85648591 100644
--- a/cpp/src/arrow/python/serialize.cc
+++ b/cpp/src/arrow/python/serialize.cc
@@ -664,11 +664,12 @@ Status CountSparseTensors(
       case SparseTensorFormat::CSR:
         ++num_csr;
         break;
-      case SparseTensorFormat::CSC:
-        // TODO(mrkn): support csc
       case SparseTensorFormat::CSF:
         ++num_csf;
         break;
+      case SparseTensorFormat::CSC:
+        // TODO(mrkn): support csc
+        break;
     }
   }
 
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 609cd2321d6..0a6a91ab9d4 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -60,7 +60,8 @@ class SparseTensorConverter {
 // ----------------------------------------------------------------------
 // IncrementIndex for SparseCOOIndex and SparseCSFIndex
 
-void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shape) {
+inline void IncrementIndex(std::vector<int64_t>& coord,
+                           const std::vector<int64_t>& shape) {
   const int64_t ndim = shape.size();
   ++coord[ndim - 1];
   if (coord[ndim - 1] == shape[ndim - 1]) {
@@ -73,8 +74,8 @@ void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shap
   }
 }
 
-void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t> shape,
-                    std::vector<int64_t> axis_order) {
+inline void IncrementIndex(std::vector<int64_t>& coord, const std::vector<int64_t>& shape,
+                           const std::vector<int64_t>& axis_order) {
   const int64_t ndim = shape.size();
   const int64_t last_axis = axis_order[ndim - 1];
   ++coord[last_axis];
@@ -161,9 +162,7 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
             *indices++ = static_cast<c_index_value_type>(coord[i]);
           }
         }
-        if (n > 1) {
-          IncrementIndex(coord, shape);
-        }
+        IncrementIndex(coord, shape);
       }
     }
 
@@ -496,11 +495,9 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
 
           for (int64_t i = 0; i < ndim; ++i) {
             int64_t dimension = axis_order[i];
-            bool change = coord[dimension] != previous_coord[dimension];
-
-            if (tree_split || change) {
-              if (change) tree_split = true;
 
+            tree_split = tree_split || (coord[dimension] != previous_coord[dimension]);
+            if (tree_split) {
               if (i < ndim - 1) {
                 RETURN_NOT_OK(indptr_buffer_builders[i].Append(
                     static_cast<c_index_value_type>(counts[i + 1])));
@@ -512,9 +509,7 @@ class SparseTensorConverter<TYPE, SparseCSFIndex>
           }
           previous_coord = coord;
         }
-        if (n > 1) {
-          IncrementIndex(coord, shape, axis_order);
-        }
+        IncrementIndex(coord, shape, axis_order);
       }
     }
 
@@ -682,25 +677,26 @@ Status MakeSparseTensorFromTensor(const Tensor& tensor,
 namespace {
 
 template <typename TYPE, typename IndexValueType>
-void ExpandSparseCSFTensorValues(int64_t dimension, int64_t offset, int64_t first_ptr,
-                                 int64_t last_ptr, const SparseCSFIndex* sparse_index,
-                                 const TYPE* raw_data, const std::vector<int64_t> strides,
-                                 const std::vector<int64_t> axis_order, TYPE* out) {
+void ExpandSparseCSFTensorValues(int64_t dimension, int64_t dense_offset,
+                                 int64_t first_ptr, int64_t last_ptr,
+                                 const SparseCSFIndex& sparse_index, const TYPE* raw_data,
+                                 const std::vector<int64_t>& strides,
+                                 const std::vector<int64_t>& axis_order, TYPE* out) {
   int64_t ndim = axis_order.size();
 
   for (int64_t i = first_ptr; i < last_ptr; ++i) {
-    int64_t tmp_offset =
-        offset + sparse_index->indices()[dimension]->Value<IndexValueType>({i}) *
-                     strides[axis_order[dimension]];
+    int64_t tmp_dense_offset =
+        dense_offset + sparse_index.indices()[dimension]->Value<IndexValueType>({i}) *
+                           strides[axis_order[dimension]];
 
     if (dimension < ndim - 1) {
       ExpandSparseCSFTensorValues<TYPE, IndexValueType>(
-          dimension + 1, tmp_offset,
-          sparse_index->indptr()[dimension]->Value<IndexValueType>({i}),
-          sparse_index->indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
+          dimension + 1, tmp_dense_offset,
+          sparse_index.indptr()[dimension]->Value<IndexValueType>({i}),
+          sparse_index.indptr()[dimension]->Value<IndexValueType>({i + 1}), sparse_index,
           raw_data, strides, axis_order, out);
     } else {
-      out[tmp_offset] = raw_data[i];
+      out[tmp_dense_offset] = raw_data[i];
     }
   }
 }
@@ -797,7 +793,7 @@ Status MakeTensorFromSparseTensor(MemoryPool* pool, const SparseTensor* sparse_t
           internal::checked_cast<const SparseCSFIndex&>(*sparse_tensor->sparse_index());
 
       ExpandSparseCSFTensorValues<value_type, IndexValueType>(
-          0, 0, 0, sparse_index.indptr()[0]->size() - 1, &sparse_index, raw_data, strides,
+          0, 0, 0, sparse_index.indptr()[0]->size() - 1, sparse_index, raw_data, strides,
           sparse_index.axis_order(), values);
       *out = std::make_shared<Tensor>(sparse_tensor->type(), values_buffer,
                                       sparse_tensor->shape(), empty_strides,
@@ -995,11 +991,11 @@ inline Status CheckSparseCSFIndexValidity(const std::shared_ptr<DataType>& indpt
   }
   if (num_indptrs + 1 != num_indices) {
     return Status::Invalid(
-        "Length of indices must be equal to length of inptrs + 1 for SparseCSFIndex.");
+        "Length of indices must be equal to length of indptrs + 1 for SparseCSFIndex.");
   }
   if (axis_order_size != num_indices) {
     return Status::Invalid(
-        "Length of indices must be equal number of dimensions for SparseCSFIndex.");
+        "Length of indices must be equal to number of dimensions for SparseCSFIndex.");
   }
   return Status::OK();
 }
@@ -1045,6 +1041,16 @@ SparseCSFIndex::SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
 
 std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); }
 
+bool SparseCSFIndex::Equals(const SparseCSFIndex& other) const {
+  for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i) {
+    if (!indices()[i]->Equals(*other.indices()[i])) return false;
+  }
+  for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i) {
+    if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
+  }
+  return axis_order() == other.axis_order();
+}
+
 // ----------------------------------------------------------------------
 // SparseTensor
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 64e730b78d3..4071f31c5ca 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -372,10 +372,10 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
                           std::vector<std::shared_ptr<Tensor>>& indices,
                           const std::vector<int64_t>& axis_order);
 
-  /// \brief Return a 1D tensor of indptr vector
+  /// \brief Return a 1D vector of indptr tensors
   const std::vector<std::shared_ptr<Tensor>>& indptr() const { return indptr_; }
 
-  /// \brief Return a 1D tensor of indices vector
+  /// \brief Return a 1D vector of indices tensors
   const std::vector<std::shared_ptr<Tensor>>& indices() const { return indices_; }
 
   /// \brief Return a 1D vector specifying the order of axes
@@ -385,13 +385,7 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
   std::string ToString() const override;
 
   /// \brief Return whether the CSF indices are equal
-  bool Equals(const SparseCSFIndex& other) const {
-    for (int64_t i = 0; i < static_cast<int64_t>(indices().size()); ++i)
-      if (!indices()[i]->Equals(*other.indices()[i])) return false;
-    for (int64_t i = 0; i < static_cast<int64_t>(indptr().size()); ++i)
-      if (!indptr()[i]->Equals(*other.indptr()[i])) return false;
-    return axis_order() == other.axis_order();
-  }
+  bool Equals(const SparseCSFIndex& other) const;
 
  protected:
   std::vector<std::shared_ptr<Tensor>> indptr_;
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index 2b5186acda2..d6e88324934 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -26,11 +26,11 @@
 
 #include <gtest/gtest.h>
 
-#include <arrow/util/sort.h>
 #include "arrow/sparse_tensor.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/util.h"
 #include "arrow/type.h"
+#include "arrow/util/sort.h"
 
 namespace arrow {
 
@@ -949,7 +949,7 @@ class TestSparseCSFTensorForIndexValueType
     : public TestSparseCSFTensorBase<IndexValueType> {
  protected:
   std::shared_ptr<SparseCSFIndex> MakeSparseCSFIndex(
-      const std::vector<int64_t> axis_order,
+      const std::vector<int64_t>& axis_order,
       std::vector<std::vector<typename IndexValueType::c_type>>& indptr_values,
       std::vector<std::vector<typename IndexValueType::c_type>>& indices_values) const {
     int64_t ndim = axis_order.size();
@@ -972,7 +972,8 @@ class TestSparseCSFTensorForIndexValueType
   template <typename CValueType>
   std::shared_ptr<SparseCSFTensor> MakeSparseTensor(
       const std::shared_ptr<SparseCSFIndex>& si, std::vector<CValueType>& sparse_values,
-      const std::vector<int64_t> shape, const std::vector<std::string> dim_names) const {
+      const std::vector<int64_t>& shape,
+      const std::vector<std::string>& dim_names) const {
     auto data_buffer = Buffer::Wrap(sparse_values);
     return std::make_shared<SparseCSFTensor>(
         si, CTypeTraits<CValueType>::type_singleton(), data_buffer, shape, dim_names);
@@ -1001,29 +1002,10 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestCreateSparseTensor) {
 }
 
 TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestTensorToSparseTensor) {
-  using IndexValueType = TypeParam;
-  std::vector<int64_t> shape = {2, 3, 4, 5};
-  int16_t dense_values[2][3][4][5] = {};  // zero-initialized
-  dense_values[0][0][0][1] = 1;
-  dense_values[0][0][0][2] = 2;
-  dense_values[0][1][0][0] = 3;
-  dense_values[0][1][0][2] = 4;
-  dense_values[0][1][1][0] = 5;
-  dense_values[1][1][1][0] = 6;
-  dense_values[1][1][1][1] = 7;
-  dense_values[1][1][1][2] = 8;
-  auto dense_buffer = Buffer::Wrap(dense_values, sizeof(dense_values));
-  Tensor dense_tensor(int16(), dense_buffer, shape, {}, this->dim_names_);
-
-  std::shared_ptr<SparseCSFTensor> sparse_tensor;
-  ASSERT_OK_AND_ASSIGN(
-      sparse_tensor,
-      SparseCSFTensor::Make(dense_tensor, TypeTraits<IndexValueType>::type_singleton()));
-
-  ASSERT_EQ(8, sparse_tensor->non_zero_length());
-  ASSERT_TRUE(sparse_tensor->is_mutable());
-  ASSERT_TRUE(sparse_tensor->Equals(*this->sparse_tensor_from_dense_));
-  ASSERT_EQ(sparse_tensor->dim_names(), dense_tensor.dim_names());
+  std::vector<std::string> dim_names = {"a", "b", "c", "d"};
+  ASSERT_EQ(8, this->sparse_tensor_from_dense_->non_zero_length());
+  ASSERT_TRUE(this->sparse_tensor_from_dense_->is_mutable());
+  ASSERT_EQ(dim_names, this->sparse_tensor_from_dense_->dim_names());
 }
 
 TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestSparseTensorToTensor) {

From 9ca93ab60da5ddfe8ab2d83615cc905b55658c47 Mon Sep 17 00:00:00 2001
From: Rok <rok@mihevc.org>
Date: Wed, 5 Feb 2020 14:07:56 +0100
Subject: [PATCH 18/18] Implementing review feedback.

---
 cpp/src/arrow/sparse_tensor.cc      |  8 ++++----
 cpp/src/arrow/sparse_tensor.h       | 12 ++++++------
 cpp/src/arrow/sparse_tensor_test.cc |  5 +++--
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 0a6a91ab9d4..549223c0798 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -1006,8 +1006,8 @@ Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
     const std::shared_ptr<DataType>& indptr_type,
     const std::shared_ptr<DataType>& indices_type,
     const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
-    std::vector<std::shared_ptr<Buffer>> indptr_data,
-    std::vector<std::shared_ptr<Buffer>> indices_data) {
+    const std::vector<std::shared_ptr<Buffer>>& indptr_data,
+    const std::vector<std::shared_ptr<Buffer>>& indices_data) {
   int64_t ndim = axis_order.size();
   std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
   std::vector<std::shared_ptr<Tensor>> indices(ndim);
@@ -1027,8 +1027,8 @@ Result<std::shared_ptr<SparseCSFIndex>> SparseCSFIndex::Make(
 }
 
 // Constructor with two index vectors
-SparseCSFIndex::SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
-                               std::vector<std::shared_ptr<Tensor>>& indices,
+SparseCSFIndex::SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
+                               const std::vector<std::shared_ptr<Tensor>>& indices,
                                const std::vector<int64_t>& axis_order)
     : SparseIndexBase(indices.back()->size()),
       indptr_(indptr),
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 4071f31c5ca..33a53761e14 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -354,22 +354,22 @@ class ARROW_EXPORT SparseCSFIndex : public internal::SparseIndexBase<SparseCSFIn
       const std::shared_ptr<DataType>& indptr_type,
       const std::shared_ptr<DataType>& indices_type,
       const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
-      std::vector<std::shared_ptr<Buffer>> indptr_data,
-      std::vector<std::shared_ptr<Buffer>> indices_data);
+      const std::vector<std::shared_ptr<Buffer>>& indptr_data,
+      const std::vector<std::shared_ptr<Buffer>>& indices_data);
 
   /// \brief Make SparseCSFIndex from raw properties
   static Result<std::shared_ptr<SparseCSFIndex>> Make(
       const std::shared_ptr<DataType>& indices_type,
       const std::vector<int64_t>& indices_shapes, const std::vector<int64_t>& axis_order,
-      std::vector<std::shared_ptr<Buffer>> indptr_data,
-      std::vector<std::shared_ptr<Buffer>> indices_data) {
+      const std::vector<std::shared_ptr<Buffer>>& indptr_data,
+      const std::vector<std::shared_ptr<Buffer>>& indices_data) {
     return Make(indices_type, indices_type, indices_shapes, axis_order, indptr_data,
                 indices_data);
   }
 
   /// \brief Construct SparseCSFIndex from two index vectors
-  explicit SparseCSFIndex(std::vector<std::shared_ptr<Tensor>>& indptr,
-                          std::vector<std::shared_ptr<Tensor>>& indices,
+  explicit SparseCSFIndex(const std::vector<std::shared_ptr<Tensor>>& indptr,
+                          const std::vector<std::shared_ptr<Tensor>>& indices,
                           const std::vector<int64_t>& axis_order);
 
   /// \brief Return a 1D vector of indptr tensors
diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc
index d6e88324934..45cb8dcc8f3 100644
--- a/cpp/src/arrow/sparse_tensor_test.cc
+++ b/cpp/src/arrow/sparse_tensor_test.cc
@@ -950,8 +950,9 @@ class TestSparseCSFTensorForIndexValueType
  protected:
   std::shared_ptr<SparseCSFIndex> MakeSparseCSFIndex(
       const std::vector<int64_t>& axis_order,
-      std::vector<std::vector<typename IndexValueType::c_type>>& indptr_values,
-      std::vector<std::vector<typename IndexValueType::c_type>>& indices_values) const {
+      const std::vector<std::vector<typename IndexValueType::c_type>>& indptr_values,
+      const std::vector<std::vector<typename IndexValueType::c_type>>& indices_values)
+      const {
     int64_t ndim = axis_order.size();
     std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
     std::vector<std::shared_ptr<Tensor>> indices(ndim);