From c3bc6edfa553a7ef6ea8332a77d5f49b1ed4fc8f Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Tue, 11 Sep 2018 11:51:49 +0900
Subject: [PATCH 01/40] Add tentative SparseTensor format

---
 format/Message.fbs      |  5 ++--
 format/SparseTensor.fbs | 52 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 format/SparseTensor.fbs

diff --git a/format/Message.fbs b/format/Message.fbs
index 830718139d8..d7dcd7647fd 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -17,6 +17,7 @@
 
 include "Schema.fbs";
 include "Tensor.fbs";
+include "SparseTensor.fbs";
 
 namespace org.apache.arrow.flatbuf;
 
@@ -87,7 +88,7 @@ table DictionaryBatch {
 /// which may include experimental metadata types. For maximum compatibility,
 /// it is best to send data using RecordBatch
 union MessageHeader {
-  Schema, DictionaryBatch, RecordBatch, Tensor
+  Schema, DictionaryBatch, RecordBatch, Tensor, SparseTensor
 }
 
 table Message {
@@ -96,4 +97,4 @@ table Message {
   bodyLength: long;
 }
 
-root_type Message;
\ No newline at end of file
+root_type Message;
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
new file mode 100644
index 00000000000..66ec098f41e
--- /dev/null
+++ b/format/SparseTensor.fbs
@@ -0,0 +1,52 @@
+include "Tensor.fbs"
+
+namespace org.apache.arrow.flatbuf;
+
+/// Coodinate format.
+table SparseTensorIndexCOO {
+  /// COO's index list are represented as a NxM matrix,
+  /// where N is the number of non-zero values,
+  /// and M is the number of dimensions of a sparse tensor.
+  /// indicesBuffer stores the location and size of this index matrix.
+  /// The type of index value is long, so the stride for the index matrix is unnecessary.
+  indicesBuffer: Buffer
+};
+
+/// Compressed Sparse Row format, that is matrix-specific.
+table SparseMatrixIndexCSR {
+  /// This array represents the range of the rows.
+  /// The ith row spans from indptr[i] to indptr[i+1] in the data.
+  /// The length of this array is 1 + (the number of rows).
+  indptr: [long]
+
+  /// indicesBuffer stores the location and size of the array that
+  /// contains the column indices of the corresponding non-zero values.
+  /// The type of index value is long.
+  indicesBuffer: Buffer
+};
+
+union SparseTensorIndex {
+  SparseTensorIndexCOO,
+  SparseMatrixIndexCSR
+};
+
+table SparseTensor {
+  /// The type of data contained in a value cell.
+  /// Currently only fixed-width value types are supported,
+  /// no strings or nested types.
+  type: Type;
+
+  /// The dimensions of the tensor, optionally named.
+  shape: [TensorDim];
+
+  /// The number of non-zero values in a sparse tensor.
+  length: long
+
+  /// Sparse tensor index
+  sparseIndex: SparseTensorIndex;
+
+  /// The location and size of the tensor's data
+  data: Buffer;
+}
+
+root_type SparseTensor;

From 1f16ffed817ac9fdcf598af81499e563c2db6d02 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 12 Sep 2018 09:22:36 +0900
Subject: [PATCH 02/40] Fix syntax error in SparseTensor.fbs

---
 format/SparseTensor.fbs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 66ec098f41e..e4b1a777888 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -1,4 +1,4 @@
-include "Tensor.fbs"
+include "Tensor.fbs";
 
 namespace org.apache.arrow.flatbuf;
 
@@ -9,26 +9,26 @@ table SparseTensorIndexCOO {
   /// and M is the number of dimensions of a sparse tensor.
   /// indicesBuffer stores the location and size of this index matrix.
   /// The type of index value is long, so the stride for the index matrix is unnecessary.
-  indicesBuffer: Buffer
-};
+  indicesBuffer: Buffer;
+}
 
 /// Compressed Sparse Row format, that is matrix-specific.
 table SparseMatrixIndexCSR {
   /// This array represents the range of the rows.
   /// The ith row spans from indptr[i] to indptr[i+1] in the data.
   /// The length of this array is 1 + (the number of rows).
-  indptr: [long]
+  indptr: [long];
 
   /// indicesBuffer stores the location and size of the array that
   /// contains the column indices of the corresponding non-zero values.
   /// The type of index value is long.
-  indicesBuffer: Buffer
-};
+  indicesBuffer: Buffer;
+}
 
 union SparseTensorIndex {
   SparseTensorIndexCOO,
   SparseMatrixIndexCSR
-};
+}
 
 table SparseTensor {
   /// The type of data contained in a value cell.
@@ -40,7 +40,7 @@ table SparseTensor {
   shape: [TensorDim];
 
   /// The number of non-zero values in a sparse tensor.
-  length: long
+  length: long;
 
   /// Sparse tensor index
   sparseIndex: SparseTensorIndex;

From aa9b8a4d08e5a1e41a54643efc8ab26cb51f07a2 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 12 Sep 2018 09:36:02 +0900
Subject: [PATCH 03/40] Add SparseTensor.fbs in FBS_SRC

---
 cpp/src/arrow/ipc/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 422e72e2eda..07e333b6edd 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -64,6 +64,7 @@ set(FBS_SRC
   ${CMAKE_SOURCE_DIR}/../format/File.fbs
   ${CMAKE_SOURCE_DIR}/../format/Schema.fbs
   ${CMAKE_SOURCE_DIR}/../format/Tensor.fbs
+  ${CMAKE_SOURCE_DIR}/../format/SparseTensor.fbs
   ${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)
 
 foreach(FIL ${FBS_SRC})

From 866b2c13ae6967c87a5db719ec0c829453a879c5 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 12 Sep 2018 09:58:31 +0900
Subject: [PATCH 04/40] Add header comments in SparseTensor.fbs

---
 format/SparseTensor.fbs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index e4b1a777888..af2b68558ef 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -1,3 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/// EXPERIMENTAL: Metadata for n-dimensional sparse tensors, that contains
+/// only non-zero values.  Arrow implementations in general are not required
+/// to implement this type
+
 include "Tensor.fbs";
 
 namespace org.apache.arrow.flatbuf;

From d7e653f174bce7d622856de821a4d33a4a634ccb Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 17 Sep 2018 11:07:42 +0900
Subject: [PATCH 05/40] Add an example of COO format in comment

---
 format/SparseTensor.fbs | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index af2b68558ef..3e57126f389 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -30,6 +30,26 @@ table SparseTensorIndexCOO {
   /// and M is the number of dimensions of a sparse tensor.
   /// indicesBuffer stores the location and size of this index matrix.
   /// The type of index value is long, so the stride for the index matrix is unnecessary.
+  ///
+  /// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
+  ///
+  ///   X[0, 1, 2, 0] := 1
+  ///   X[1, 1, 2, 3] := 2
+  ///   X[0, 2, 1, 0] := 3
+  ///   X[0, 1, 3, 0] := 4
+  ///   X[0, 1, 2, 1] := 5
+  ///   X[1, 2, 0, 4] := 6
+  ///
+  /// In COO format, the index matrix of X is the following 10x4 matrix:
+  ///
+  ///   [[0, 1, 2, 0],
+  ///    [0, 1, 2, 1],
+  ///    [0, 1, 3, 0],
+  ///    [0, 2, 1, 0],
+  ///    [1, 1, 2, 3],
+  ///    [1, 2, 0, 4]]
+  ///
+  /// Note that the indices are sorted in lexcographical order.
   indicesBuffer: Buffer;
 }
 

From 76c56dd351632d4f77bce36ecfa25a8105aac3ed Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 17 Sep 2018 11:24:15 +0900
Subject: [PATCH 06/40] Make indptr of CSR a buffer

---
 format/SparseTensor.fbs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 3e57126f389..670261999e4 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -55,10 +55,12 @@ table SparseTensorIndexCOO {
 
 /// Compressed Sparse Row format, that is matrix-specific.
 table SparseMatrixIndexCSR {
-  /// This array represents the range of the rows.
-  /// The ith row spans from indptr[i] to indptr[i+1] in the data.
-  /// The length of this array is 1 + (the number of rows).
-  indptr: [long];
+  /// indptrBuffer stores the location and size of indptr array that
+  /// represents the range of the rows.
+  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+  /// The length of this array is 1 + (the number of rows), and the type
+  /// of index value is long.
+  indptrBuffer: Buffer;
 
   /// indicesBuffer stores the location and size of the array that
   /// contains the column indices of the corresponding non-zero values.

From 2b50040f5ce3088fc1e2d4768982831f315c1a3a Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 17 Sep 2018 11:38:35 +0900
Subject: [PATCH 07/40] Add an example of the CSR format in comment

---
 format/SparseTensor.fbs | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 670261999e4..59aa6aaad6e 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -60,11 +60,32 @@ table SparseMatrixIndexCSR {
   /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
   /// The length of this array is 1 + (the number of rows), and the type
   /// of index value is long.
+  ///
+  /// For example, let X be the following 6x4 matrix:
+  ///
+  ///   X := [[0, 1, 2, 0], 
+  ///         [0, 0, 3, 0],
+  ///         [0, 4, 0, 5],
+  ///         [0, 0, 0, 0],
+  ///         [6, 0, 7, 8],
+  ///         [0, 9, 0, 0]].
+  ///
+  /// The array of non-zero values in X is:
+  ///
+  ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+  ///
+  /// And the indptr of X is:
+  ///
+  ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
   indptrBuffer: Buffer;
 
   /// indicesBuffer stores the location and size of the array that
   /// contains the column indices of the corresponding non-zero values.
   /// The type of index value is long.
+  ///
+  /// For example, the indices of the above X is:
+  ///
+  ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
   indicesBuffer: Buffer;
 }
 

From c508db086c36f7b33075c302a4a13ab4028ca63c Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 24 Sep 2018 13:48:18 +0900
Subject: [PATCH 08/40] Write sparse tensor format in IPC.md

---
 docs/source/format/IPC.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst
index 8cb74b87afc..43812f2895d 100644
--- a/docs/source/format/IPC.rst
+++ b/docs/source/format/IPC.rst
@@ -234,4 +234,27 @@ region) to be multiples of 64 bytes: ::
     <metadata>
     <tensor body>
 
+SparseTensor Message Format
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The ``SparseTensor`` message types provides another way to write a
+multidimensional array of fixed-size values using Arrow's shared memory tools
+in addition to ``Tensor``. ``SparseTensor`` is designed specifically for tensors
+whose elements are almost zeros. Arrow implementations in general are not
+required to implement this data format likewise ``Tensor``.
+
+When writing a standalone encapsulated sparse tensor message, we use the format as
+indicated above, but additionally align the starting offset of the metadata as
+well as the starting offsets of the sparse index and the sparse tensor body
+(if writing to a shared memory region) to be multiples of 64 bytes:
+
+    <PADDING>
+    <metadata size: int32>
+    <metadata>
+    <sparse index>
+    <sparse tensor body>
+
+The contents of the sparse tensor index is depends on what kinds of sparse
+format is used.
+
 .. _Flatbuffer: https://github.com/google/flatbuffers

From b24f3c34292a249f3d66bc16dd6ec668bc31a13a Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 24 Sep 2018 14:24:06 +0900
Subject: [PATCH 09/40] Insert additional padding in sparse tensor format

---
 docs/source/format/IPC.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/source/format/IPC.rst b/docs/source/format/IPC.rst
index 43812f2895d..62a1237436a 100644
--- a/docs/source/format/IPC.rst
+++ b/docs/source/format/IPC.rst
@@ -252,6 +252,7 @@ well as the starting offsets of the sparse index and the sparse tensor body
     <metadata size: int32>
     <metadata>
     <sparse index>
+    <PADDING>
     <sparse tensor body>
 
 The contents of the sparse tensor index is depends on what kinds of sparse

From 392a25b7ceae5aa4cfd4477743f7b22d4564a8ed Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 1 Oct 2018 16:26:43 +0900
Subject: [PATCH 10/40] Implement SparseTensor and SparseCOOIndex

---
 cpp/src/arrow/CMakeLists.txt        |   2 +
 cpp/src/arrow/sparse_tensor-test.cc | 179 ++++++++++++++++
 cpp/src/arrow/sparse_tensor.cc      | 321 ++++++++++++++++++++++++++++
 cpp/src/arrow/sparse_tensor.h       | 121 +++++++++++
 cpp/src/arrow/tensor.h              |   6 +
 5 files changed, 629 insertions(+)
 create mode 100644 cpp/src/arrow/sparse_tensor-test.cc
 create mode 100644 cpp/src/arrow/sparse_tensor.cc
 create mode 100644 cpp/src/arrow/sparse_tensor.h

diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index f2a81124728..91bdce294c2 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -83,6 +83,7 @@ set(ARROW_SRCS
   table.cc
   table_builder.cc
   tensor.cc
+  sparse_tensor.cc
   type.cc
   visitor.cc
 
@@ -286,6 +287,7 @@ ADD_ARROW_TEST(type-test)
 ADD_ARROW_TEST(table-test)
 ADD_ARROW_TEST(table_builder-test)
 ADD_ARROW_TEST(tensor-test)
+ADD_ARROW_TEST(sparse_tensor-test)
 
 ADD_ARROW_BENCHMARK(builder-benchmark)
 ADD_ARROW_BENCHMARK(column-benchmark)
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
new file mode 100644
index 00000000000..63ef2d11e77
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -0,0 +1,179 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Unit tests for DataType (and subclasses), Field, and Schema
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include <iostream>
+
+#include <gtest/gtest.h>
+
+#include "arrow/sparse_tensor.h"
+#include "arrow/test-util.h"
+#include "arrow/type.h"
+
+namespace arrow {
+
+TEST(TestSparseCOOTensor, CreationEmptyTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  SparseTensor<SparseCOOIndex> st1(int64(), shape);
+
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  SparseTensor<SparseCOOIndex> st2(int64(), shape, dim_names);
+
+  ASSERT_EQ(0, st1.length());
+  ASSERT_EQ(0, st2.length());
+
+  ASSERT_EQ(24, st1.size());
+  ASSERT_EQ(24, st2.size());
+
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+}
+
+TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer(Buffer::Wrap(values));
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  NumericTensor<Int64Type> tensor1(buffer, shape);
+  NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
+  SparseTensor<SparseCOOIndex> st1(tensor1);
+  SparseTensor<SparseCOOIndex> st2(tensor2);
+
+  ASSERT_EQ(12, st1.length());
+  ASSERT_TRUE(st1.is_mutable());
+
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+
+  const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_EQ(i + 1, ptr[i]);
+  }
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_EQ(i + 11, ptr[i + 6]);
+  }
+
+  std::shared_ptr<SparseCOOIndex> si = st1.sparse_index();
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
+  ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+  ASSERT_TRUE(sidx->is_column_major());
+
+  // (0, 0, 0) -> 1
+  ASSERT_EQ(0, sidx->Value({0, 0}));
+  ASSERT_EQ(0, sidx->Value({0, 1}));
+  ASSERT_EQ(0, sidx->Value({0, 2}));
+
+  // (0, 0, 2) -> 2
+  ASSERT_EQ(0, sidx->Value({1, 0}));
+  ASSERT_EQ(0, sidx->Value({1, 1}));
+  ASSERT_EQ(2, sidx->Value({1, 2}));
+
+  // (0, 1, 1) -> 3
+  ASSERT_EQ(0, sidx->Value({2, 0}));
+  ASSERT_EQ(1, sidx->Value({2, 1}));
+  ASSERT_EQ(1, sidx->Value({2, 2}));
+
+  // (1, 2, 1) -> 15
+  ASSERT_EQ(1, sidx->Value({10, 0}));
+  ASSERT_EQ(2, sidx->Value({10, 1}));
+  ASSERT_EQ(1, sidx->Value({10, 2}));
+
+  // (1, 2, 3) -> 16
+  ASSERT_EQ(1, sidx->Value({11, 0}));
+  ASSERT_EQ(2, sidx->Value({11, 1}));
+  ASSERT_EQ(3, sidx->Value({11, 2}));
+}
+
+TEST(TestSparseCOOTensor, CreationFromTensor) {
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer(Buffer::Wrap(values));
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  Tensor tensor1(int64(), buffer, shape);
+  Tensor tensor2(int64(), buffer, shape, {}, dim_names);
+  SparseTensor<SparseCOOIndex> st1(tensor1);
+  SparseTensor<SparseCOOIndex> st2(tensor2);
+
+  ASSERT_EQ(12, st1.length());
+  ASSERT_TRUE(st1.is_mutable());
+
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+
+  const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_EQ(i + 1, ptr[i]);
+  }
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_EQ(i + 11, ptr[i + 6]);
+  }
+
+  std::shared_ptr<SparseCOOIndex> si = st1.sparse_index();
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
+  ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
+  ASSERT_TRUE(sidx->is_column_major());
+
+  // (0, 0, 0) -> 1
+  ASSERT_EQ(0, sidx->Value({0, 0}));
+  ASSERT_EQ(0, sidx->Value({0, 1}));
+  ASSERT_EQ(0, sidx->Value({0, 2}));
+
+  // (0, 0, 2) -> 2
+  ASSERT_EQ(0, sidx->Value({1, 0}));
+  ASSERT_EQ(0, sidx->Value({1, 1}));
+  ASSERT_EQ(2, sidx->Value({1, 2}));
+
+  // (0, 1, 1) -> 3
+  ASSERT_EQ(0, sidx->Value({2, 0}));
+  ASSERT_EQ(1, sidx->Value({2, 1}));
+  ASSERT_EQ(1, sidx->Value({2, 2}));
+
+  // (1, 2, 1) -> 15
+  ASSERT_EQ(1, sidx->Value({10, 0}));
+  ASSERT_EQ(2, sidx->Value({10, 1}));
+  ASSERT_EQ(1, sidx->Value({10, 2}));
+
+  // (1, 2, 3) -> 16
+  ASSERT_EQ(1, sidx->Value({11, 0}));
+  ASSERT_EQ(2, sidx->Value({11, 1}));
+  ASSERT_EQ(3, sidx->Value({11, 2}));
+}
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
new file mode 100644
index 00000000000..ba7a2e82fd5
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -0,0 +1,321 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/sparse_tensor.h"
+
+#include <functional>
+#include <memory>
+#include <numeric>
+
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+namespace {
+
+template <typename T>
+struct SparseIndexTraits {};
+
+template <>
+struct SparseIndexTraits<SparseCOOIndex> {
+  static inline const char* name() { return "SparseCOOIndex"; }
+};
+
+template <typename TYPE, typename SparseIndexType>
+class SparseTensorConverter {
+ public:
+  explicit SparseTensorConverter(const NumericTensor<TYPE>&) {}
+
+  Status Convert() {
+    std::string sparse_index_name(SparseIndexTraits<SparseIndexType>::name());
+    return Status::NotImplemented(sparse_index_name +
+                                  std::string(" is not supported yet."));
+  }
+};
+
+template <typename TYPE>
+class SparseTensorConverter<TYPE, SparseCOOIndex> {
+ public:
+  using NumericTensorType = NumericTensor<TYPE>;
+  using value_type = typename NumericTensorType::value_type;
+
+  explicit SparseTensorConverter(const NumericTensor<TYPE>& tensor) : tensor_(tensor) {}
+
+  Status Convert() {
+    const int64_t ndim = tensor_.ndim();
+    const int64_t nonzero_count = static_cast<int64_t>(CountNonZero());
+
+    std::shared_ptr<Buffer> indices_buffer;
+    RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * ndim * nonzero_count, &indices_buffer));
+    int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
+
+    std::shared_ptr<Buffer> values_buffer;
+    RETURN_NOT_OK(AllocateBuffer(sizeof(value_type) * nonzero_count, &values_buffer));
+    value_type* values = reinterpret_cast<value_type*>(values_buffer->mutable_data());
+
+    if (ndim <= 1) {
+      const value_type* data = reinterpret_cast<const value_type*>(tensor_.raw_data());
+      const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0];
+      for (int64_t i = 0; i < count; ++i, ++data) {
+        if (*data != 0) {
+          *indices++ = i;
+          *values++ = *data;
+        }
+      }
+    } else {
+      const std::vector<int64_t>& shape = tensor_.shape();
+      std::vector<int64_t> coord(ndim, 0);
+
+      for (int64_t n = tensor_.size(); n > 0; n--) {
+        const value_type x = tensor_.Value(coord);
+        if (tensor_.Value(coord) != 0) {
+          *values++ = x;
+
+          int64_t *indp = indices;
+          for (int64_t i = 0; i < ndim; ++i) {
+            *indp = coord[i];
+            indp += nonzero_count;
+          }
+          indices++;
+        }
+
+        // increment index
+        ++coord[ndim - 1];
+        if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+          int64_t d = ndim - 1;
+          while (d > 0 && coord[d] == shape[d]) {
+            coord[d] = 0;
+            ++coord[d - 1];
+            --d;
+          }
+        }
+      }
+    }
+
+    // make results
+    const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
+    const int64_t indices_elsize = sizeof(int64_t);
+    const std::vector<int64_t> indices_strides = {indices_elsize, indices_elsize * nonzero_count};
+    sparse_index = std::make_shared<SparseCOOIndex>(
+        std::make_shared<SparseCOOIndex::CoordsTensor>(indices_buffer,
+                                                       indices_shape,
+                                                       indices_strides));
+    data = values_buffer;
+
+    return Status::OK();
+  }
+
+  std::shared_ptr<SparseCOOIndex> sparse_index;
+  std::shared_ptr<Buffer> data;
+
+ protected:
+  bool TensorIsTriviallyIterable() const {
+    return tensor_.ndim() <= 1 || tensor_.is_contiguous();
+  }
+
+  size_t CountNonZero() const {
+    if (tensor_.size() == 0) {
+      return 0;
+    }
+
+    if (TensorIsTriviallyIterable()) {
+      const value_type* data = reinterpret_cast<const value_type*>(tensor_.raw_data());
+      return std::count_if(data, data + tensor_.size(), [](value_type x) { return x != 0; });
+    }
+
+    const std::vector<int64_t>& shape = tensor_.shape();
+    const int64_t ndim = tensor_.ndim();
+
+    size_t count = 0;
+    std::vector<int64_t> coord(ndim, 0);
+    for (int64_t n = tensor_.size(); n > 0; n--) {
+      if (tensor_.Value(coord) != 0) {
+        ++count;
+      }
+
+      // increment index
+      ++coord[ndim - 1];
+      if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+        int64_t d = ndim - 1;
+        while (d > 0 && coord[d] == shape[d]) {
+          coord[d] = 0;
+          ++coord[d - 1];
+          --d;
+        }
+      }
+    }
+    return count;
+  }
+
+ private:
+  const NumericTensor<TYPE>& tensor_;
+};
+
+template <typename TYPE, typename SparseIndexType>
+void MakeSparseCOOTensorFromTensor(const Tensor& tensor,
+                                   std::shared_ptr<SparseIndexType>* sparse_index,
+                                   std::shared_ptr<Buffer>* data) {
+  NumericTensor<TYPE> numeric_tensor(tensor.data(), tensor.shape(), tensor.strides());
+  SparseTensorConverter<TYPE, SparseIndexType> converter(numeric_tensor);
+  DCHECK_OK(converter.Convert());
+  *sparse_index = converter.sparse_index;
+  *data = converter.data;
+}
+
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt8Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt16Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt32Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt64Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int8Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int16Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int32Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int64Type, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<HalfFloatType, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<FloatType, SparseCOOIndex>;
+template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<DoubleType, SparseCOOIndex>;
+
+}  // namespace
+
+// Constructor with a column-major NumericTensor
+SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
+    : SparseIndex(coords->shape()[0]), coords_(coords) {
+  DCHECK(coords_->is_column_major());
+}
+
+// Constructor with all attributes
+template <typename SparseIndexType>
+SparseTensor<SparseIndexType>::SparseTensor(
+    const std::shared_ptr<SparseIndexType>& sparse_index,
+    const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+    const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names)
+    : type_(type),
+      data_(data),
+      shape_(shape),
+      sparse_index_(sparse_index),
+      dim_names_(dim_names) {
+  DCHECK(is_tensor_supported(type->id()));
+}
+
+// Constructor with a dense tensor
+template <typename SparseIndexType>
+SparseTensor<SparseIndexType>::SparseTensor(const std::shared_ptr<DataType>& type,
+                                            const std::vector<int64_t>& shape,
+                                            const std::vector<std::string>& dim_names)
+    : SparseTensor(nullptr, type, nullptr, shape, dim_names) {}
+
+// Constructor with a dense tensor
+template <typename SparseIndexType>
+template <typename TYPE>
+SparseTensor<SparseIndexType>::SparseTensor(const NumericTensor<TYPE>& tensor)
+    : SparseTensor(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
+  SparseTensorConverter<TYPE, SparseIndexType> converter(tensor);
+  DCHECK_OK(converter.Convert());
+  sparse_index_ = converter.sparse_index;
+  data_ = converter.data;
+}
+
+// Constructor with a dense tensor
+template <typename SparseIndexType>
+SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
+    : SparseTensor(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
+  switch (tensor.type()->id()) {
+    case Type::UINT8:
+      MakeSparseCOOTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                &data_);
+      return;
+    case Type::INT8:
+      MakeSparseCOOTensorFromTensor<Int8Type, SparseIndexType>(tensor, &sparse_index_,
+                                                               &data_);
+      return;
+    case Type::UINT16:
+      MakeSparseCOOTensorFromTensor<UInt16Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                 &data_);
+      return;
+    case Type::INT16:
+      MakeSparseCOOTensorFromTensor<Int16Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                &data_);
+      return;
+    case Type::UINT32:
+      MakeSparseCOOTensorFromTensor<UInt32Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                 &data_);
+      return;
+    case Type::INT32:
+      MakeSparseCOOTensorFromTensor<Int32Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                &data_);
+      return;
+    case Type::UINT64:
+      MakeSparseCOOTensorFromTensor<UInt64Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                 &data_);
+      return;
+    case Type::INT64:
+      MakeSparseCOOTensorFromTensor<Int64Type, SparseIndexType>(tensor, &sparse_index_,
+                                                                &data_);
+      return;
+    case Type::HALF_FLOAT:
+      MakeSparseCOOTensorFromTensor<HalfFloatType, SparseIndexType>(
+          tensor, &sparse_index_, &data_);
+      return;
+    case Type::FLOAT:
+      MakeSparseCOOTensorFromTensor<FloatType, SparseIndexType>(tensor, &sparse_index_,
+                                                                &data_);
+      return;
+    case Type::DOUBLE:
+      MakeSparseCOOTensorFromTensor<DoubleType, SparseIndexType>(tensor, &sparse_index_,
+                                                                 &data_);
+      return;
+    default:
+      break;
+  }
+}
+
+template <typename SparseIndexType>
+const std::string& SparseTensor<SparseIndexType>::dim_name(int i) const {
+  static const std::string kEmpty = "";
+  if (dim_names_.size() == 0) {
+    return kEmpty;
+  } else {
+    DCHECK_LT(i, static_cast<int>(dim_names_.size()));
+    return dim_names_[i];
+  }
+}
+
+template <typename SparseIndexType>
+int64_t SparseTensor<SparseIndexType>::size() const {
+  return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
+}
+
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+template class ARROW_TEMPLATE_EXPORT SparseTensor<SparseCOOIndex>;
+
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt8Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt16Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt32Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt64Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int8Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int16Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int32Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int64Type>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<HalfFloatType>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<FloatType>&);
+template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<DoubleType>&);
+
+}  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
new file mode 100644
index 00000000000..b9b8bc96af0
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SPARSE_TENSOR_H
+#define ARROW_SPARSE_TENSOR_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/tensor.h"
+
+namespace arrow {
+
+// ----------------------------------------------------------------------
+// SparseIndex class
+
+class ARROW_EXPORT SparseIndex {
+ public:
+  explicit SparseIndex(int64_t length) : length_(length) {}
+  int64_t length() const { return length_; }
+
+ protected:
+  int64_t length_;
+};
+
+// ----------------------------------------------------------------------
+// SparseCOOIndex class
+
+class ARROW_EXPORT SparseCOOIndex : public SparseIndex {
+ public:
+  using CoordsTensor = NumericTensor<Int64Type>;
+
+  virtual ~SparseCOOIndex() = default;
+
+  // Constructor with a column-major NumericTensor
+  explicit SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords);
+
+  const std::shared_ptr<CoordsTensor>& indices() const { return coords_; }
+
+ protected:
+  std::shared_ptr<CoordsTensor> coords_;
+};
+
+// ----------------------------------------------------------------------
+// SparseTensor class
+
+template <typename SparseIndexType>
+class ARROW_EXPORT SparseTensor {
+ public:
+  virtual ~SparseTensor() = default;
+
+  // Constructor with all attributes
+  SparseTensor(const std::shared_ptr<SparseIndexType>& sparse_index,
+               const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+               const std::vector<int64_t>& shape,
+               const std::vector<std::string>& dim_names);
+
+  // Constructor with a dense tensor
+  SparseTensor(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+               const std::vector<std::string>& dim_names = {});
+
+  // Constructor with a dense numeric tensor
+  template <typename TYPE>
+  explicit SparseTensor(const NumericTensor<TYPE>& tensor);
+
+  // Constructor with a dense tensor
+  explicit SparseTensor(const Tensor& tensor);
+
+  std::shared_ptr<DataType> type() const { return type_; }
+  std::shared_ptr<Buffer> data() const { return data_; }
+
+  const uint8_t* raw_data() const { return data_->data(); }
+  uint8_t* raw_mutable_data() const { return data_->mutable_data(); }
+
+  const std::vector<int64_t>& shape() const { return shape_; }
+  const std::shared_ptr<SparseIndexType>& sparse_index() const { return sparse_index_; }
+
+  int ndim() const { return static_cast<int>(shape_.size()); }
+
+  const std::string& dim_name(int i) const;
+
+  /// Total number of non-zero cells in the sparse tensor
+  int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
+
+  /// Total number of value cells in the sparse tensor
+  int64_t size() const;
+
+  /// Return true if the underlying data buffer is mutable
+  bool is_mutable() const { return data_->is_mutable(); }
+
+ protected:
+  std::shared_ptr<DataType> type_;
+  std::shared_ptr<Buffer> data_;
+  std::vector<int64_t> shape_;
+  std::shared_ptr<SparseIndexType> sparse_index_;
+
+  /// These names are optional
+  std::vector<std::string> dim_names_;
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensor);
+};
+
+}  // namespace arrow
+
+#endif  // ARROW_SPARSE_TENSOR_H
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index a9b5df81fa1..e386b096037 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -50,6 +50,9 @@ static inline bool is_tensor_supported(Type::type type_id) {
   return false;
 }
 
+template <typename SparseIndexType>
+class SparseTensor;
+
 class ARROW_EXPORT Tensor {
  public:
   virtual ~Tensor() = default;
@@ -110,6 +113,9 @@ class ARROW_EXPORT Tensor {
   /// These names are optional
   std::vector<std::string> dim_names_;
 
+  template <typename SparseIndexType>
+  friend class SparseTensor;
+
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
 };

From 433c9b4416ff0011eccfa21f8517eaab96f4d1fc Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 14 Nov 2018 17:51:40 +0900
Subject: [PATCH 11/40] Change COO index matrix to column-major in a format
 description

---
 format/SparseTensor.fbs | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
index 59aa6aaad6e..95666979bb5 100644
--- a/format/SparseTensor.fbs
+++ b/format/SparseTensor.fbs
@@ -40,14 +40,12 @@ table SparseTensorIndexCOO {
   ///   X[0, 1, 2, 1] := 5
   ///   X[1, 2, 0, 4] := 6
   ///
-  /// In COO format, the index matrix of X is the following 10x4 matrix:
+  /// In COO format, the index matrix of X is the following 4x10 matrix:
   ///
-  ///   [[0, 1, 2, 0],
-  ///    [0, 1, 2, 1],
-  ///    [0, 1, 3, 0],
-  ///    [0, 2, 1, 0],
-  ///    [1, 1, 2, 3],
-  ///    [1, 2, 0, 4]]
+  ///   [[0, 0, 0, 0, 1, 1],
+  ///    [1, 1, 1, 2, 1, 2],
+  ///    [2, 2, 3, 1, 2, 0],
+  ///    [0, 1, 0, 0, 3, 4]]
   ///
   /// Note that the indices are sorted in lexcographical order.
   indicesBuffer: Buffer;

From 4251b4d08eac6f1c91598d227f931d6690b97e7d Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Tue, 20 Nov 2018 21:16:49 +0900
Subject: [PATCH 12/40] Add SparseCSRIndex

---
 cpp/src/arrow/sparse_tensor-test.cc |  53 ++++++
 cpp/src/arrow/sparse_tensor.cc      | 273 ++++++++++++++++++++--------
 cpp/src/arrow/sparse_tensor.h       |  21 +++
 3 files changed, 273 insertions(+), 74 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 63ef2d11e77..7705d380676 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -176,4 +176,57 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
   ASSERT_EQ(3, sidx->Value({11, 2}));
 }
 
+TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
+  std::vector<int64_t> shape = {6, 4};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+  std::shared_ptr<Buffer> buffer(Buffer::Wrap(values));
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  NumericTensor<Int64Type> tensor1(buffer, shape);
+  NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
+
+  SparseTensor<SparseCSRIndex> st1(tensor1);
+  SparseTensor<SparseCSRIndex> st2(tensor2);
+
+  ASSERT_EQ(12, st1.length());
+  ASSERT_TRUE(st1.is_mutable());
+
+  ASSERT_EQ("foo", st2.dim_name(0));
+  ASSERT_EQ("bar", st2.dim_name(1));
+  ASSERT_EQ("baz", st2.dim_name(2));
+
+  ASSERT_EQ("", st1.dim_name(0));
+  ASSERT_EQ("", st1.dim_name(1));
+  ASSERT_EQ("", st1.dim_name(2));
+
+  const int64_t* ptr = reinterpret_cast<const int64_t*>(st1.raw_data());
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_EQ(i + 1, ptr[i]);
+  }
+  for (int i = 0; i < 6; ++i) {
+    ASSERT_EQ(i + 11, ptr[i + 6]);
+  }
+
+  std::shared_ptr<SparseCSRIndex> si = st1.sparse_index();
+
+  ASSERT_EQ(1, si->indptr()->ndim());
+  ASSERT_EQ(1, si->indices()->ndim());
+
+  const int64_t* indptr_begin =
+      reinterpret_cast<const int64_t*>(si->indptr()->raw_data());
+  std::vector<int64_t> indptr_values(indptr_begin,
+                                     indptr_begin + si->indptr()->shape()[0]);
+
+  ASSERT_EQ(7, indptr_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
+
+  const int64_t* indices_begin =
+      reinterpret_cast<const int64_t*>(si->indices()->raw_data());
+  std::vector<int64_t> indices_values(indices_begin,
+                                      indices_begin + si->indices()->shape()[0]);
+
+  ASSERT_EQ(12, indices_values.size());
+  ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index ba7a2e82fd5..533177aa111 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -35,6 +35,14 @@ struct SparseIndexTraits<SparseCOOIndex> {
   static inline const char* name() { return "SparseCOOIndex"; }
 };
 
+template <>
+struct SparseIndexTraits<SparseCSRIndex> {
+  static inline const char* name() { return "SparseCSRIndex"; }
+};
+
+// ----------------------------------------------------------------------
+// SparseTensorConverter
+
 template <typename TYPE, typename SparseIndexType>
 class SparseTensorConverter {
  public:
@@ -47,20 +55,75 @@ class SparseTensorConverter {
   }
 };
 
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCOOIndex
+
 template <typename TYPE>
-class SparseTensorConverter<TYPE, SparseCOOIndex> {
- public:
+struct SparseTensorConverterBase {
   using NumericTensorType = NumericTensor<TYPE>;
   using value_type = typename NumericTensorType::value_type;
 
-  explicit SparseTensorConverter(const NumericTensor<TYPE>& tensor) : tensor_(tensor) {}
+  explicit SparseTensorConverterBase(const NumericTensorType& tensor) : tensor_(tensor) {}
+
+  bool TensorIsTriviallyIterable() const {
+    return tensor_.ndim() <= 1 || tensor_.is_contiguous();
+  }
+
+  size_t CountNonZero() const {
+    if (tensor_.size() == 0) {
+      return 0;
+    }
+
+    if (TensorIsTriviallyIterable()) {
+      const value_type* data = reinterpret_cast<const value_type*>(tensor_.raw_data());
+      return std::count_if(data, data + tensor_.size(),
+                           [](value_type x) { return x != 0; });
+    }
+
+    const std::vector<int64_t>& shape = tensor_.shape();
+    const int64_t ndim = tensor_.ndim();
+
+    size_t count = 0;
+    std::vector<int64_t> coord(ndim, 0);
+    for (int64_t n = tensor_.size(); n > 0; n--) {
+      if (tensor_.Value(coord) != 0) {
+        ++count;
+      }
+
+      // increment index
+      ++coord[ndim - 1];
+      if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
+        int64_t d = ndim - 1;
+        while (d > 0 && coord[d] == shape[d]) {
+          coord[d] = 0;
+          ++coord[d - 1];
+          --d;
+        }
+      }
+    }
+    return count;
+  }
+
+  const NumericTensorType& tensor_;
+};
+
+template <typename TYPE>
+class SparseTensorConverter<TYPE, SparseCOOIndex>
+    : private SparseTensorConverterBase<TYPE> {
+ public:
+  using BaseClass = SparseTensorConverterBase<TYPE>;
+  using NumericTensorType = typename BaseClass::NumericTensorType;
+  using value_type = typename BaseClass::value_type;
+
+  explicit SparseTensorConverter(const NumericTensorType& tensor) : BaseClass(tensor) {}
 
   Status Convert() {
     const int64_t ndim = tensor_.ndim();
     const int64_t nonzero_count = static_cast<int64_t>(CountNonZero());
 
     std::shared_ptr<Buffer> indices_buffer;
-    RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * ndim * nonzero_count, &indices_buffer));
+    RETURN_NOT_OK(
+        AllocateBuffer(sizeof(int64_t) * ndim * nonzero_count, &indices_buffer));
     int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
 
     std::shared_ptr<Buffer> values_buffer;
@@ -85,7 +148,7 @@ class SparseTensorConverter<TYPE, SparseCOOIndex> {
         if (tensor_.Value(coord) != 0) {
           *values++ = x;
 
-          int64_t *indp = indices;
+          int64_t* indp = indices;
           for (int64_t i = 0; i < ndim; ++i) {
             *indp = coord[i];
             indp += nonzero_count;
@@ -109,11 +172,11 @@ class SparseTensorConverter<TYPE, SparseCOOIndex> {
     // make results
     const std::vector<int64_t> indices_shape = {nonzero_count, ndim};
     const int64_t indices_elsize = sizeof(int64_t);
-    const std::vector<int64_t> indices_strides = {indices_elsize, indices_elsize * nonzero_count};
-    sparse_index = std::make_shared<SparseCOOIndex>(
-        std::make_shared<SparseCOOIndex::CoordsTensor>(indices_buffer,
-                                                       indices_shape,
-                                                       indices_strides));
+    const std::vector<int64_t> indices_strides = {indices_elsize,
+                                                  indices_elsize * nonzero_count};
+    sparse_index =
+        std::make_shared<SparseCOOIndex>(std::make_shared<SparseCOOIndex::CoordsTensor>(
+            indices_buffer, indices_shape, indices_strides));
     data = values_buffer;
 
     return Status::OK();
@@ -122,47 +185,9 @@ class SparseTensorConverter<TYPE, SparseCOOIndex> {
   std::shared_ptr<SparseCOOIndex> sparse_index;
   std::shared_ptr<Buffer> data;
 
- protected:
-  bool TensorIsTriviallyIterable() const {
-    return tensor_.ndim() <= 1 || tensor_.is_contiguous();
-  }
-
-  size_t CountNonZero() const {
-    if (tensor_.size() == 0) {
-      return 0;
-    }
-
-    if (TensorIsTriviallyIterable()) {
-      const value_type* data = reinterpret_cast<const value_type*>(tensor_.raw_data());
-      return std::count_if(data, data + tensor_.size(), [](value_type x) { return x != 0; });
-    }
-
-    const std::vector<int64_t>& shape = tensor_.shape();
-    const int64_t ndim = tensor_.ndim();
-
-    size_t count = 0;
-    std::vector<int64_t> coord(ndim, 0);
-    for (int64_t n = tensor_.size(); n > 0; n--) {
-      if (tensor_.Value(coord) != 0) {
-        ++count;
-      }
-
-      // increment index
-      ++coord[ndim - 1];
-      if (n > 1 && coord[ndim - 1] == shape[ndim - 1]) {
-        int64_t d = ndim - 1;
-        while (d > 0 && coord[d] == shape[d]) {
-          coord[d] = 0;
-          ++coord[d - 1];
-          --d;
-        }
-      }
-    }
-    return count;
-  }
-
  private:
-  const NumericTensor<TYPE>& tensor_;
+  using SparseTensorConverterBase<TYPE>::tensor_;
+  using SparseTensorConverterBase<TYPE>::CountNonZero;
 };
 
 template <typename TYPE, typename SparseIndexType>
@@ -176,29 +201,126 @@ void MakeSparseCOOTensorFromTensor(const Tensor& tensor,
   *data = converter.data;
 }
 
+// ----------------------------------------------------------------------
+// SparseTensorConverter for SparseCSRIndex
+
+template <typename TYPE>
+class SparseTensorConverter<TYPE, SparseCSRIndex>
+    : private SparseTensorConverterBase<TYPE> {
+ public:
+  using BaseClass = SparseTensorConverterBase<TYPE>;
+  using NumericTensorType = typename BaseClass::NumericTensorType;
+  using value_type = typename BaseClass::value_type;
+
+  explicit SparseTensorConverter(const NumericTensorType& tensor) : BaseClass(tensor) {}
+
+  Status Convert() {
+    const int64_t ndim = tensor_.ndim();
+    if (ndim > 2) {
+      return Status::Invalid("Invalid tensor dimension");
+    }
+
+    const int64_t nr = tensor_.shape()[0];
+    const int64_t nc = tensor_.shape()[1];
+    const int64_t nonzero_count = static_cast<int64_t>(CountNonZero());
+
+    std::shared_ptr<Buffer> indptr_buffer;
+    std::shared_ptr<Buffer> indices_buffer;
+
+    std::shared_ptr<Buffer> values_buffer;
+    RETURN_NOT_OK(AllocateBuffer(sizeof(value_type) * nonzero_count, &values_buffer));
+    value_type* values = reinterpret_cast<value_type*>(values_buffer->mutable_data());
+
+    if (ndim <= 1) {
+      return Status::NotImplemented("TODO for ndim <= 1");
+    } else {
+      RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * (nr + 1), &indptr_buffer));
+      int64_t* indptr = reinterpret_cast<int64_t*>(indptr_buffer->mutable_data());
+
+      RETURN_NOT_OK(AllocateBuffer(sizeof(int64_t) * nonzero_count, &indices_buffer));
+      int64_t* indices = reinterpret_cast<int64_t*>(indices_buffer->mutable_data());
+
+      int64_t k = 0;
+      *indptr++ = 0;
+      for (int64_t i = 0; i < nr; ++i) {
+        for (int64_t j = 0; j < nc; ++j) {
+          const value_type x = tensor_.Value({i, j});
+          if (x != 0) {
+            *values++ = x;
+            *indices++ = j;
+            k++;
+          }
+        }
+        *indptr++ = k;
+      }
+    }
+
+    std::vector<int64_t> indptr_shape({nr + 1});
+    std::shared_ptr<SparseCSRIndex::IndexTensor> indptr_tensor =
+        std::make_shared<SparseCSRIndex::IndexTensor>(indptr_buffer, indptr_shape);
+
+    std::vector<int64_t> indices_shape({nonzero_count});
+    std::shared_ptr<SparseCSRIndex::IndexTensor> indices_tensor =
+        std::make_shared<SparseCSRIndex::IndexTensor>(indices_buffer, indices_shape);
+
+    sparse_index = std::make_shared<SparseCSRIndex>(indptr_tensor, indices_tensor);
+    data = values_buffer;
+
+    return Status::OK();
+  }
+
+  std::shared_ptr<SparseCSRIndex> sparse_index;
+  std::shared_ptr<Buffer> data;
+
+ private:
+  using BaseClass::tensor_;
+  using SparseTensorConverterBase<TYPE>::CountNonZero;
+};
+
 // ----------------------------------------------------------------------
 // Instantiate templates
 
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt8Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt16Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt32Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt64Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int8Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int16Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int32Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int64Type, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<HalfFloatType, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<FloatType, SparseCOOIndex>;
-template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<DoubleType, SparseCOOIndex>;
+#define INSTANTIATE_SPARSE_TENSOR_CONVERTER(IndexType)                                  \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt8Type, IndexType>;     \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt16Type, IndexType>;    \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt32Type, IndexType>;    \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<UInt64Type, IndexType>;    \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int8Type, IndexType>;      \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int16Type, IndexType>;     \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int32Type, IndexType>;     \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<Int64Type, IndexType>;     \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<HalfFloatType, IndexType>; \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<FloatType, IndexType>;     \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorConverter<DoubleType, IndexType>
+
+INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCOOIndex);
+INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSRIndex);
 
 }  // namespace
 
+// ----------------------------------------------------------------------
+// SparseCOOIndex
+
 // Constructor with a column-major NumericTensor
 SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
     : SparseIndex(coords->shape()[0]), coords_(coords) {
   DCHECK(coords_->is_column_major());
 }
 
+// ----------------------------------------------------------------------
+// SparseCSRIndex
+
+// Constructor with two index vectors
+SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
+                               const std::shared_ptr<IndexTensor>& indices)
+    : SparseIndex(indices->shape()[0]), indptr_(indptr), indices_(indices) {
+  DCHECK_EQ(1, indptr_->ndim());
+  DCHECK_EQ(1, indices_->ndim());
+}
+
+// ----------------------------------------------------------------------
+// SparseTensor
+
 // Constructor with all attributes
 template <typename SparseIndexType>
 SparseTensor<SparseIndexType>::SparseTensor(
@@ -304,18 +426,21 @@ int64_t SparseTensor<SparseIndexType>::size() const {
 // ----------------------------------------------------------------------
 // Instantiate templates
 
-template class ARROW_TEMPLATE_EXPORT SparseTensor<SparseCOOIndex>;
-
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt8Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt16Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt32Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<UInt64Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int8Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int16Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int32Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<Int64Type>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<HalfFloatType>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<FloatType>&);
-template SparseTensor<SparseCOOIndex>::SparseTensor(const NumericTensor<DoubleType>&);
+#define INSTANTIATE_SPARSE_TENSOR(IndexType)                                           \
+  template class ARROW_TEMPLATE_EXPORT SparseTensor<IndexType>;                        \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt8Type>&);     \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt16Type>&);    \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt32Type>&);    \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt64Type>&);    \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int8Type>&);      \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int16Type>&);     \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int32Type>&);     \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int64Type>&);     \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<HalfFloatType>&); \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<FloatType>&);     \
+  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<DoubleType>&)
+
+INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex);
+INSTANTIATE_SPARSE_TENSOR(SparseCSRIndex);
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index b9b8bc96af0..be891007f26 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -56,6 +56,27 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndex {
   std::shared_ptr<CoordsTensor> coords_;
 };
 
+// ----------------------------------------------------------------------
+// SparseCSRIndex class
+
+class ARROW_EXPORT SparseCSRIndex : public SparseIndex {
+ public:
+  using IndexTensor = NumericTensor<Int64Type>;
+
+  virtual ~SparseCSRIndex() = default;
+
+  // Constructor with two index vectors
+  explicit SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
+                          const std::shared_ptr<IndexTensor>& indices);
+
+  const std::shared_ptr<IndexTensor>& indptr() const { return indptr_; }
+  const std::shared_ptr<IndexTensor>& indices() const { return indices_; }
+
+ protected:
+  std::shared_ptr<IndexTensor> indptr_;
+  std::shared_ptr<IndexTensor> indices_;
+};
+
 // ----------------------------------------------------------------------
 // SparseTensor class
 

From ed3984dd47e026a1ae171892b99601c6cadba8cd Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 5 Dec 2018 09:34:41 +0900
Subject: [PATCH 13/40] Add SparseIndex::format_type

---
 cpp/src/arrow/sparse_tensor.cc | 21 +++------------------
 cpp/src/arrow/sparse_tensor.h  | 15 ++++++++++++++-
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 533177aa111..e8562c487a2 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -27,19 +27,6 @@ namespace arrow {
 
 namespace {
 
-template <typename T>
-struct SparseIndexTraits {};
-
-template <>
-struct SparseIndexTraits<SparseCOOIndex> {
-  static inline const char* name() { return "SparseCOOIndex"; }
-};
-
-template <>
-struct SparseIndexTraits<SparseCSRIndex> {
-  static inline const char* name() { return "SparseCSRIndex"; }
-};
-
 // ----------------------------------------------------------------------
 // SparseTensorConverter
 
@@ -49,9 +36,7 @@ class SparseTensorConverter {
   explicit SparseTensorConverter(const NumericTensor<TYPE>&) {}
 
   Status Convert() {
-    std::string sparse_index_name(SparseIndexTraits<SparseIndexType>::name());
-    return Status::NotImplemented(sparse_index_name +
-                                  std::string(" is not supported yet."));
+    return Status::Invalid("Unsupported sparse index");
   }
 };
 
@@ -303,7 +288,7 @@ INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSRIndex);
 
 // Constructor with a column-major NumericTensor
 SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
-    : SparseIndex(coords->shape()[0]), coords_(coords) {
+    : SparseIndex(SparseIndex::COO, coords->shape()[0]), coords_(coords) {
   DCHECK(coords_->is_column_major());
 }
 
@@ -313,7 +298,7 @@ SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
 // Constructor with two index vectors
 SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
                                const std::shared_ptr<IndexTensor>& indices)
-    : SparseIndex(indices->shape()[0]), indptr_(indptr), indices_(indices) {
+    : SparseIndex(SparseIndex::CSR, indices->shape()[0]), indptr_(indptr), indices_(indices) {
   DCHECK_EQ(1, indptr_->ndim());
   DCHECK_EQ(1, indices_->ndim());
 }
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index be891007f26..e5a3915a60d 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -31,10 +31,19 @@ namespace arrow {
 
 class ARROW_EXPORT SparseIndex {
  public:
-  explicit SparseIndex(int64_t length) : length_(length) {}
+  enum format_type {
+    COO,
+    CSR
+  };
+
+  explicit SparseIndex(format_type format_type_id, int64_t length)
+      : format_type_id_(format_type_id), length_(length) {}
+
+  format_type format_type_id() const { return format_type_id_; }
   int64_t length() const { return length_; }
 
  protected:
+  format_type format_type_id_;
   int64_t length_;
 };
 
@@ -45,6 +54,8 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndex {
  public:
   using CoordsTensor = NumericTensor<Int64Type>;
 
+  static constexpr SparseIndex::format_type format_type_id = SparseIndex::COO;
+
   virtual ~SparseCOOIndex() = default;
 
   // Constructor with a column-major NumericTensor
@@ -63,6 +74,8 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndex {
  public:
   using IndexTensor = NumericTensor<Int64Type>;
 
+  static constexpr SparseIndex::format_type format_type_id = SparseIndex::COO;
+
   virtual ~SparseCSRIndex() = default;
 
   // Constructor with two index vectors

From 021b46be0d36bfca849a6838a23ac02cb4b6c828 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 5 Dec 2018 10:03:49 +0900
Subject: [PATCH 14/40] Add SparseTensorBase

---
 cpp/src/arrow/sparse_tensor-test.cc | 16 ++++-
 cpp/src/arrow/sparse_tensor.cc      | 77 ++++++++++++------------
 cpp/src/arrow/sparse_tensor.h       | 91 +++++++++++++++++++----------
 3 files changed, 111 insertions(+), 73 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 7705d380676..d31538e7610 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -32,6 +32,12 @@
 
 namespace arrow {
 
+static inline void CheckSparseIndexFormatType(SparseIndex::format_type expected,
+                                              const SparseTensorBase& sparse_tensor) {
+  ASSERT_EQ(expected, sparse_tensor.sparse_index_format_type_id());
+  ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_type_id());
+}
+
 TEST(TestSparseCOOTensor, CreationEmptyTensor) {
   std::vector<int64_t> shape = {2, 3, 4};
   SparseTensor<SparseCOOIndex> st1(int64(), shape);
@@ -65,6 +71,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
   SparseTensor<SparseCOOIndex> st1(tensor1);
   SparseTensor<SparseCOOIndex> st2(tensor2);
 
+  CheckSparseIndexFormatType(SparseIndex::COO, st1);
+
   ASSERT_EQ(12, st1.length());
   ASSERT_TRUE(st1.is_mutable());
 
@@ -84,7 +92,7 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCOOIndex> si = st1.sparse_index();
+  std::shared_ptr<SparseCOOIndex> si = std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
   std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
@@ -145,7 +153,7 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCOOIndex> si = st1.sparse_index();
+  std::shared_ptr<SparseCOOIndex> si = std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
   std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
@@ -188,6 +196,8 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   SparseTensor<SparseCSRIndex> st1(tensor1);
   SparseTensor<SparseCSRIndex> st2(tensor2);
 
+  CheckSparseIndexFormatType(SparseIndex::CSR, st1);
+
   ASSERT_EQ(12, st1.length());
   ASSERT_TRUE(st1.is_mutable());
 
@@ -207,7 +217,7 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCSRIndex> si = st1.sparse_index();
+  std::shared_ptr<SparseCSRIndex> si = std::dynamic_pointer_cast<SparseCSRIndex>(st1.sparse_index());
 
   ASSERT_EQ(1, si->indptr()->ndim());
   ASSERT_EQ(1, si->indices()->ndim());
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index e8562c487a2..0f437380bd4 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -176,9 +176,9 @@ class SparseTensorConverter<TYPE, SparseCOOIndex>
 };
 
 template <typename TYPE, typename SparseIndexType>
-void MakeSparseCOOTensorFromTensor(const Tensor& tensor,
-                                   std::shared_ptr<SparseIndexType>* sparse_index,
-                                   std::shared_ptr<Buffer>* data) {
+void MakeSparseTensorFromTensor(const Tensor& tensor,
+                                std::shared_ptr<SparseIndex>* sparse_index,
+                                std::shared_ptr<Buffer>* data) {
   NumericTensor<TYPE> numeric_tensor(tensor.data(), tensor.shape(), tensor.strides());
   SparseTensorConverter<TYPE, SparseIndexType> converter(numeric_tensor);
   DCHECK_OK(converter.Convert());
@@ -288,7 +288,7 @@ INSTANTIATE_SPARSE_TENSOR_CONVERTER(SparseCSRIndex);
 
 // Constructor with a column-major NumericTensor
 SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
-    : SparseIndex(SparseIndex::COO, coords->shape()[0]), coords_(coords) {
+    : SparseIndexBase(coords->shape()[0]), coords_(coords) {
   DCHECK(coords_->is_column_major());
 }
 
@@ -298,20 +298,20 @@ SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
 // Constructor with two index vectors
 SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
                                const std::shared_ptr<IndexTensor>& indices)
-    : SparseIndex(SparseIndex::CSR, indices->shape()[0]), indptr_(indptr), indices_(indices) {
+    : SparseIndexBase(indices->shape()[0]), indptr_(indptr), indices_(indices) {
   DCHECK_EQ(1, indptr_->ndim());
   DCHECK_EQ(1, indices_->ndim());
 }
 
 // ----------------------------------------------------------------------
-// SparseTensor
+// SparseTensorBase
 
 // Constructor with all attributes
-template <typename SparseIndexType>
-SparseTensor<SparseIndexType>::SparseTensor(
-    const std::shared_ptr<SparseIndexType>& sparse_index,
-    const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
-    const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names)
+SparseTensorBase::SparseTensorBase(const std::shared_ptr<DataType>& type,
+                                   const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape,
+                                   const std::shared_ptr<SparseIndex>& sparse_index,
+                                   const std::vector<std::string>& dim_names)
     : type_(type),
       data_(data),
       shape_(shape),
@@ -320,6 +320,23 @@ SparseTensor<SparseIndexType>::SparseTensor(
   DCHECK(is_tensor_supported(type->id()));
 }
 
+const std::string& SparseTensorBase::dim_name(int i) const {
+  static const std::string kEmpty = "";
+  if (dim_names_.size() == 0) {
+    return kEmpty;
+  } else {
+    DCHECK_LT(i, static_cast<int>(dim_names_.size()));
+    return dim_names_[i];
+  }
+}
+
+int64_t SparseTensorBase::size() const {
+  return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
+}
+
+// ----------------------------------------------------------------------
+// SparseTensor
+
 // Constructor with a dense tensor
 template <typename SparseIndexType>
 SparseTensor<SparseIndexType>::SparseTensor(const std::shared_ptr<DataType>& type,
@@ -344,47 +361,47 @@ SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
     : SparseTensor(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
   switch (tensor.type()->id()) {
     case Type::UINT8:
-      MakeSparseCOOTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
                                                                 &data_);
       return;
     case Type::INT8:
-      MakeSparseCOOTensorFromTensor<Int8Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<Int8Type, SparseIndexType>(tensor, &sparse_index_,
                                                                &data_);
       return;
     case Type::UINT16:
-      MakeSparseCOOTensorFromTensor<UInt16Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<UInt16Type, SparseIndexType>(tensor, &sparse_index_,
                                                                  &data_);
       return;
     case Type::INT16:
-      MakeSparseCOOTensorFromTensor<Int16Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<Int16Type, SparseIndexType>(tensor, &sparse_index_,
                                                                 &data_);
       return;
     case Type::UINT32:
-      MakeSparseCOOTensorFromTensor<UInt32Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<UInt32Type, SparseIndexType>(tensor, &sparse_index_,
                                                                  &data_);
       return;
     case Type::INT32:
-      MakeSparseCOOTensorFromTensor<Int32Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<Int32Type, SparseIndexType>(tensor, &sparse_index_,
                                                                 &data_);
       return;
     case Type::UINT64:
-      MakeSparseCOOTensorFromTensor<UInt64Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<UInt64Type, SparseIndexType>(tensor, &sparse_index_,
                                                                  &data_);
       return;
     case Type::INT64:
-      MakeSparseCOOTensorFromTensor<Int64Type, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<Int64Type, SparseIndexType>(tensor, &sparse_index_,
                                                                 &data_);
       return;
     case Type::HALF_FLOAT:
-      MakeSparseCOOTensorFromTensor<HalfFloatType, SparseIndexType>(
+      MakeSparseTensorFromTensor<HalfFloatType, SparseIndexType>(
           tensor, &sparse_index_, &data_);
       return;
     case Type::FLOAT:
-      MakeSparseCOOTensorFromTensor<FloatType, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<FloatType, SparseIndexType>(tensor, &sparse_index_,
                                                                 &data_);
       return;
     case Type::DOUBLE:
-      MakeSparseCOOTensorFromTensor<DoubleType, SparseIndexType>(tensor, &sparse_index_,
+      MakeSparseTensorFromTensor<DoubleType, SparseIndexType>(tensor, &sparse_index_,
                                                                  &data_);
       return;
     default:
@@ -392,22 +409,6 @@ SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
   }
 }
 
-template <typename SparseIndexType>
-const std::string& SparseTensor<SparseIndexType>::dim_name(int i) const {
-  static const std::string kEmpty = "";
-  if (dim_names_.size() == 0) {
-    return kEmpty;
-  } else {
-    DCHECK_LT(i, static_cast<int>(dim_names_.size()));
-    return dim_names_[i];
-  }
-}
-
-template <typename SparseIndexType>
-int64_t SparseTensor<SparseIndexType>::size() const {
-  return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
-}
-
 // ----------------------------------------------------------------------
 // Instantiate templates
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index e5a3915a60d..51918073ed7 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -39,6 +39,8 @@ class ARROW_EXPORT SparseIndex {
   explicit SparseIndex(format_type format_type_id, int64_t length)
       : format_type_id_(format_type_id), length_(length) {}
 
+  virtual ~SparseIndex() = default;
+
   format_type format_type_id() const { return format_type_id_; }
   int64_t length() const { return length_; }
 
@@ -47,17 +49,22 @@ class ARROW_EXPORT SparseIndex {
   int64_t length_;
 };
 
+template <typename SparseIndexType>
+class SparseIndexBase : public SparseIndex {
+ public:
+  explicit SparseIndexBase(int64_t length)
+      : SparseIndex(SparseIndexType::format_type_id, length) {}
+};
+
 // ----------------------------------------------------------------------
 // SparseCOOIndex class
 
-class ARROW_EXPORT SparseCOOIndex : public SparseIndex {
+class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
  public:
   using CoordsTensor = NumericTensor<Int64Type>;
 
   static constexpr SparseIndex::format_type format_type_id = SparseIndex::COO;
 
-  virtual ~SparseCOOIndex() = default;
-
   // Constructor with a column-major NumericTensor
   explicit SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords);
 
@@ -70,13 +77,11 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndex {
 // ----------------------------------------------------------------------
 // SparseCSRIndex class
 
-class ARROW_EXPORT SparseCSRIndex : public SparseIndex {
+class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
  public:
   using IndexTensor = NumericTensor<Int64Type>;
 
-  static constexpr SparseIndex::format_type format_type_id = SparseIndex::COO;
-
-  virtual ~SparseCSRIndex() = default;
+  static constexpr SparseIndex::format_type format_type_id = SparseIndex::CSR;
 
   // Constructor with two index vectors
   explicit SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
@@ -91,29 +96,13 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndex {
 };
 
 // ----------------------------------------------------------------------
-// SparseTensor class
+// SparseTensorBase class
 
-template <typename SparseIndexType>
-class ARROW_EXPORT SparseTensor {
+class ARROW_EXPORT SparseTensorBase {
  public:
-  virtual ~SparseTensor() = default;
+  virtual ~SparseTensorBase() = default;
 
-  // Constructor with all attributes
-  SparseTensor(const std::shared_ptr<SparseIndexType>& sparse_index,
-               const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
-               const std::vector<int64_t>& shape,
-               const std::vector<std::string>& dim_names);
-
-  // Constructor with a dense tensor
-  SparseTensor(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
-               const std::vector<std::string>& dim_names = {});
-
-  // Constructor with a dense numeric tensor
-  template <typename TYPE>
-  explicit SparseTensor(const NumericTensor<TYPE>& tensor);
-
-  // Constructor with a dense tensor
-  explicit SparseTensor(const Tensor& tensor);
+  virtual SparseIndex::format_type sparse_index_format_type_id() const = 0;
 
   std::shared_ptr<DataType> type() const { return type_; }
   std::shared_ptr<Buffer> data() const { return data_; }
@@ -122,29 +111,67 @@ class ARROW_EXPORT SparseTensor {
   uint8_t* raw_mutable_data() const { return data_->mutable_data(); }
 
   const std::vector<int64_t>& shape() const { return shape_; }
-  const std::shared_ptr<SparseIndexType>& sparse_index() const { return sparse_index_; }
+
+  const std::shared_ptr<SparseIndex>& sparse_index() const { return sparse_index_; }
 
   int ndim() const { return static_cast<int>(shape_.size()); }
 
   const std::string& dim_name(int i) const;
 
-  /// Total number of non-zero cells in the sparse tensor
-  int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
-
   /// Total number of value cells in the sparse tensor
   int64_t size() const;
 
   /// Return true if the underlying data buffer is mutable
   bool is_mutable() const { return data_->is_mutable(); }
 
+  /// Total number of non-zero cells in the sparse tensor
+  virtual int64_t length() const = 0;
+
  protected:
+  // Constructor with all attributes
+  SparseTensorBase(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+                   const std::vector<int64_t>& shape, const std::shared_ptr<SparseIndex>& sparse_index,
+                   const std::vector<std::string>& dim_names);
+
   std::shared_ptr<DataType> type_;
   std::shared_ptr<Buffer> data_;
   std::vector<int64_t> shape_;
-  std::shared_ptr<SparseIndexType> sparse_index_;
+  std::shared_ptr<SparseIndex> sparse_index_;
 
   /// These names are optional
   std::vector<std::string> dim_names_;
+};
+
+// ----------------------------------------------------------------------
+// SparseTensor class
+
+template <typename SparseIndexType>
+class ARROW_EXPORT SparseTensor : public SparseTensorBase {
+ public:
+  virtual ~SparseTensor() = default;
+
+  // Constructor with all attributes
+  SparseTensor(const std::shared_ptr<SparseIndexType>& sparse_index,
+               const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+               const std::vector<int64_t>& shape,
+               const std::vector<std::string>& dim_names)
+      : SparseTensorBase(type, data, shape, sparse_index, dim_names) {}
+
+  // Constructor for empty sparse tensor
+  SparseTensor(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+               const std::vector<std::string>& dim_names = {});
+
+  // Constructor with a dense numeric tensor
+  template <typename TYPE>
+  explicit SparseTensor(const NumericTensor<TYPE>& tensor);
+
+  // Constructor with a dense tensor
+  explicit SparseTensor(const Tensor& tensor);
+
+  SparseIndex::format_type sparse_index_format_type_id() const { return SparseIndexType::format_type_id; }
+
+  /// Total number of non-zero cells in the sparse tensor
+  int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
 
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensor);

From 93c03adad1c25d1b0570efc0aa6e9eecc0aff44c Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Thu, 6 Dec 2018 12:41:11 +0900
Subject: [PATCH 15/40] Add SparseIndex::ToString()

---
 cpp/src/arrow/sparse_tensor.cc | 4 ++++
 cpp/src/arrow/sparse_tensor.h  | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 0f437380bd4..d93696233e7 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -292,6 +292,8 @@ SparseCOOIndex::SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords)
   DCHECK(coords_->is_column_major());
 }
 
+std::string SparseCOOIndex::ToString() const { return std::string("SparseCOOIndex"); }
+
 // ----------------------------------------------------------------------
 // SparseCSRIndex
 
@@ -303,6 +305,8 @@ SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
   DCHECK_EQ(1, indices_->ndim());
 }
 
+std::string SparseCSRIndex::ToString() const { return std::string("SparseCOOIndex"); }
+
 // ----------------------------------------------------------------------
 // SparseTensorBase
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 51918073ed7..a746f4d4621 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -44,6 +44,8 @@ class ARROW_EXPORT SparseIndex {
   format_type format_type_id() const { return format_type_id_; }
   int64_t length() const { return length_; }
 
+  virtual std::string ToString() const = 0;
+
  protected:
   format_type format_type_id_;
   int64_t length_;
@@ -70,6 +72,8 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
 
   const std::shared_ptr<CoordsTensor>& indices() const { return coords_; }
 
+  std::string ToString() const override;
+
  protected:
   std::shared_ptr<CoordsTensor> coords_;
 };
@@ -90,6 +94,8 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
   const std::shared_ptr<IndexTensor>& indptr() const { return indptr_; }
   const std::shared_ptr<IndexTensor>& indices() const { return indices_; }
 
+  std::string ToString() const override;
+
  protected:
   std::shared_ptr<IndexTensor> indptr_;
   std::shared_ptr<IndexTensor> indices_;

From 51a83bfee658ccf4f38b9f885ceb22223be4307b Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 7 Dec 2018 15:12:42 +0900
Subject: [PATCH 16/40] Add SparseTensorFormat

---
 cpp/src/arrow/sparse_tensor-test.cc  | 10 +++++-----
 cpp/src/arrow/sparse_tensor.h        | 25 +++++++++++--------------
 cpp/src/arrow/sparse_tensor_format.h | 28 ++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+), 19 deletions(-)
 create mode 100644 cpp/src/arrow/sparse_tensor_format.h

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index d31538e7610..f4e7edabeeb 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -32,10 +32,10 @@
 
 namespace arrow {
 
-static inline void CheckSparseIndexFormatType(SparseIndex::format_type expected,
+static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
                                               const SparseTensorBase& sparse_tensor) {
-  ASSERT_EQ(expected, sparse_tensor.sparse_index_format_type_id());
-  ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_type_id());
+  ASSERT_EQ(expected, sparse_tensor.sparse_tensor_format_id());
+  ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
 }
 
 TEST(TestSparseCOOTensor, CreationEmptyTensor) {
@@ -71,7 +71,7 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
   SparseTensor<SparseCOOIndex> st1(tensor1);
   SparseTensor<SparseCOOIndex> st2(tensor2);
 
-  CheckSparseIndexFormatType(SparseIndex::COO, st1);
+  CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
 
   ASSERT_EQ(12, st1.length());
   ASSERT_TRUE(st1.is_mutable());
@@ -196,7 +196,7 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   SparseTensor<SparseCSRIndex> st1(tensor1);
   SparseTensor<SparseCSRIndex> st2(tensor2);
 
-  CheckSparseIndexFormatType(SparseIndex::CSR, st1);
+  CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
 
   ASSERT_EQ(12, st1.length());
   ASSERT_TRUE(st1.is_mutable());
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index a746f4d4621..0a63ad1afd3 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -23,6 +23,7 @@
 #include <vector>
 
 #include "arrow/tensor.h"
+#include "arrow/sparse_tensor_format.h"
 
 namespace arrow {
 
@@ -31,23 +32,18 @@ namespace arrow {
 
 class ARROW_EXPORT SparseIndex {
  public:
-  enum format_type {
-    COO,
-    CSR
-  };
-
-  explicit SparseIndex(format_type format_type_id, int64_t length)
-      : format_type_id_(format_type_id), length_(length) {}
+  explicit SparseIndex(SparseTensorFormat::type format_id, int64_t length)
+      : format_id_(format_id), length_(length) {}
 
   virtual ~SparseIndex() = default;
 
-  format_type format_type_id() const { return format_type_id_; }
+  SparseTensorFormat::type format_id() const { return format_id_; }
   int64_t length() const { return length_; }
 
   virtual std::string ToString() const = 0;
 
  protected:
-  format_type format_type_id_;
+  SparseTensorFormat::type format_id_;
   int64_t length_;
 };
 
@@ -55,7 +51,7 @@ template <typename SparseIndexType>
 class SparseIndexBase : public SparseIndex {
  public:
   explicit SparseIndexBase(int64_t length)
-      : SparseIndex(SparseIndexType::format_type_id, length) {}
+      : SparseIndex(SparseIndexType::format_id, length) {}
 };
 
 // ----------------------------------------------------------------------
@@ -65,7 +61,7 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
  public:
   using CoordsTensor = NumericTensor<Int64Type>;
 
-  static constexpr SparseIndex::format_type format_type_id = SparseIndex::COO;
+  static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::COO;
 
   // Constructor with a column-major NumericTensor
   explicit SparseCOOIndex(const std::shared_ptr<CoordsTensor>& coords);
@@ -85,7 +81,7 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
  public:
   using IndexTensor = NumericTensor<Int64Type>;
 
-  static constexpr SparseIndex::format_type format_type_id = SparseIndex::CSR;
+  static constexpr SparseTensorFormat::type format_id = SparseTensorFormat::CSR;
 
   // Constructor with two index vectors
   explicit SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
@@ -108,7 +104,7 @@ class ARROW_EXPORT SparseTensorBase {
  public:
   virtual ~SparseTensorBase() = default;
 
-  virtual SparseIndex::format_type sparse_index_format_type_id() const = 0;
+  virtual SparseTensorFormat::type sparse_tensor_format_id() const = 0;
 
   std::shared_ptr<DataType> type() const { return type_; }
   std::shared_ptr<Buffer> data() const { return data_; }
@@ -146,6 +142,7 @@ class ARROW_EXPORT SparseTensorBase {
 
   /// These names are optional
   std::vector<std::string> dim_names_;
+
 };
 
 // ----------------------------------------------------------------------
@@ -174,7 +171,7 @@ class ARROW_EXPORT SparseTensor : public SparseTensorBase {
   // Constructor with a dense tensor
   explicit SparseTensor(const Tensor& tensor);
 
-  SparseIndex::format_type sparse_index_format_type_id() const { return SparseIndexType::format_type_id; }
+  SparseTensorFormat::type sparse_tensor_format_id() const { return SparseIndexType::format_id; }
 
   /// Total number of non-zero cells in the sparse tensor
   int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
diff --git a/cpp/src/arrow/sparse_tensor_format.h b/cpp/src/arrow/sparse_tensor_format.h
new file mode 100644
index 00000000000..42392d68ee7
--- /dev/null
+++ b/cpp/src/arrow/sparse_tensor_format.h
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef ARROW_SPARSE_TENSOR_FORMAT_H
+#define ARROW_SPARSE_TENSOR_FORMAT_H
+
+struct SparseTensorFormat {
+  enum type {
+    COO,
+    CSR
+  };
+};
+
+#endif  // ARROW_SPARSE_TENSOR_FORMAT_H

From 1d9042709d89d420b84332f1e278076b8aea98bb Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 7 Dec 2018 15:20:08 +0900
Subject: [PATCH 17/40] Fix format

---
 cpp/src/arrow/sparse_tensor-test.cc  |  9 ++++++---
 cpp/src/arrow/sparse_tensor.cc       | 28 +++++++++++++---------------
 cpp/src/arrow/sparse_tensor.h        | 12 +++++++-----
 cpp/src/arrow/sparse_tensor_format.h |  5 +----
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index f4e7edabeeb..64778ca33b6 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -92,7 +92,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCOOIndex> si = std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
+  std::shared_ptr<SparseCOOIndex> si =
+      std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
   std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
@@ -153,7 +154,8 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCOOIndex> si = std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
+  std::shared_ptr<SparseCOOIndex> si =
+      std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
   std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
@@ -217,7 +219,8 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCSRIndex> si = std::dynamic_pointer_cast<SparseCSRIndex>(st1.sparse_index());
+  std::shared_ptr<SparseCSRIndex> si =
+      std::dynamic_pointer_cast<SparseCSRIndex>(st1.sparse_index());
 
   ASSERT_EQ(1, si->indptr()->ndim());
   ASSERT_EQ(1, si->indices()->ndim());
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index d93696233e7..a8d96c3bd62 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -35,9 +35,7 @@ class SparseTensorConverter {
  public:
   explicit SparseTensorConverter(const NumericTensor<TYPE>&) {}
 
-  Status Convert() {
-    return Status::Invalid("Unsupported sparse index");
-  }
+  Status Convert() { return Status::Invalid("Unsupported sparse index"); }
 };
 
 // ----------------------------------------------------------------------
@@ -366,47 +364,47 @@ SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
   switch (tensor.type()->id()) {
     case Type::UINT8:
       MakeSparseTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                &data_);
+                                                             &data_);
       return;
     case Type::INT8:
       MakeSparseTensorFromTensor<Int8Type, SparseIndexType>(tensor, &sparse_index_,
-                                                               &data_);
+                                                            &data_);
       return;
     case Type::UINT16:
       MakeSparseTensorFromTensor<UInt16Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                 &data_);
+                                                              &data_);
       return;
     case Type::INT16:
       MakeSparseTensorFromTensor<Int16Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                &data_);
+                                                             &data_);
       return;
     case Type::UINT32:
       MakeSparseTensorFromTensor<UInt32Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                 &data_);
+                                                              &data_);
       return;
     case Type::INT32:
       MakeSparseTensorFromTensor<Int32Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                &data_);
+                                                             &data_);
       return;
     case Type::UINT64:
       MakeSparseTensorFromTensor<UInt64Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                 &data_);
+                                                              &data_);
       return;
     case Type::INT64:
       MakeSparseTensorFromTensor<Int64Type, SparseIndexType>(tensor, &sparse_index_,
-                                                                &data_);
+                                                             &data_);
       return;
     case Type::HALF_FLOAT:
-      MakeSparseTensorFromTensor<HalfFloatType, SparseIndexType>(
-          tensor, &sparse_index_, &data_);
+      MakeSparseTensorFromTensor<HalfFloatType, SparseIndexType>(tensor, &sparse_index_,
+                                                                 &data_);
       return;
     case Type::FLOAT:
       MakeSparseTensorFromTensor<FloatType, SparseIndexType>(tensor, &sparse_index_,
-                                                                &data_);
+                                                             &data_);
       return;
     case Type::DOUBLE:
       MakeSparseTensorFromTensor<DoubleType, SparseIndexType>(tensor, &sparse_index_,
-                                                                 &data_);
+                                                              &data_);
       return;
     default:
       break;
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 0a63ad1afd3..cc55e734ec9 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -22,8 +22,8 @@
 #include <string>
 #include <vector>
 
-#include "arrow/tensor.h"
 #include "arrow/sparse_tensor_format.h"
+#include "arrow/tensor.h"
 
 namespace arrow {
 
@@ -131,8 +131,9 @@ class ARROW_EXPORT SparseTensorBase {
 
  protected:
   // Constructor with all attributes
-  SparseTensorBase(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
-                   const std::vector<int64_t>& shape, const std::shared_ptr<SparseIndex>& sparse_index,
+  SparseTensorBase(const std::shared_ptr<DataType>& type,
+                   const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                   const std::shared_ptr<SparseIndex>& sparse_index,
                    const std::vector<std::string>& dim_names);
 
   std::shared_ptr<DataType> type_;
@@ -142,7 +143,6 @@ class ARROW_EXPORT SparseTensorBase {
 
   /// These names are optional
   std::vector<std::string> dim_names_;
-
 };
 
 // ----------------------------------------------------------------------
@@ -171,7 +171,9 @@ class ARROW_EXPORT SparseTensor : public SparseTensorBase {
   // Constructor with a dense tensor
   explicit SparseTensor(const Tensor& tensor);
 
-  SparseTensorFormat::type sparse_tensor_format_id() const { return SparseIndexType::format_id; }
+  SparseTensorFormat::type sparse_tensor_format_id() const {
+    return SparseIndexType::format_id;
+  }
 
   /// Total number of non-zero cells in the sparse tensor
   int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
diff --git a/cpp/src/arrow/sparse_tensor_format.h b/cpp/src/arrow/sparse_tensor_format.h
index 42392d68ee7..24c1a190f50 100644
--- a/cpp/src/arrow/sparse_tensor_format.h
+++ b/cpp/src/arrow/sparse_tensor_format.h
@@ -19,10 +19,7 @@
 #define ARROW_SPARSE_TENSOR_FORMAT_H
 
 struct SparseTensorFormat {
-  enum type {
-    COO,
-    CSR
-  };
+  enum type { COO, CSR };
 };
 
 #endif  // ARROW_SPARSE_TENSOR_FORMAT_H

From 6bc9e296f5f213b579c55a35875b46eb5f85d454 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 7 Dec 2018 15:13:54 +0900
Subject: [PATCH 18/40] Support IPC read and write of SparseTensor

---
 cpp/src/arrow/ipc/message.cc           |   2 +
 cpp/src/arrow/ipc/message.h            |   2 +-
 cpp/src/arrow/ipc/metadata-internal.cc | 128 +++++++++++++++++++++++++
 cpp/src/arrow/ipc/metadata-internal.h  |  11 +++
 cpp/src/arrow/ipc/read-write-test.cc   | 110 +++++++++++++++++++++
 cpp/src/arrow/ipc/reader.cc            | 101 +++++++++++++++++++
 cpp/src/arrow/ipc/reader.h             |  25 +++++
 cpp/src/arrow/ipc/writer.cc            | 101 +++++++++++++++++++
 cpp/src/arrow/ipc/writer.h             |  15 +++
 9 files changed, 494 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 8adf4a8b660..23709a46192 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -63,6 +63,8 @@ class Message::MessageImpl {
         return Message::RECORD_BATCH;
       case flatbuf::MessageHeader_Tensor:
         return Message::TENSOR;
+      case flatbuf::MessageHeader_SparseTensor:
+        return Message::SPARSE_TENSOR;
       default:
         return Message::NONE;
     }
diff --git a/cpp/src/arrow/ipc/message.h b/cpp/src/arrow/ipc/message.h
index 092a19ff9a0..760012d1a68 100644
--- a/cpp/src/arrow/ipc/message.h
+++ b/cpp/src/arrow/ipc/message.h
@@ -70,7 +70,7 @@ constexpr int kMaxNestingDepth = 64;
 /// \brief An IPC message including metadata and body
 class ARROW_EXPORT Message {
  public:
-  enum Type { NONE, SCHEMA, DICTIONARY_BATCH, RECORD_BATCH, TENSOR };
+  enum Type { NONE, SCHEMA, DICTIONARY_BATCH, RECORD_BATCH, TENSOR, SPARSE_TENSOR };
 
   /// \brief Construct message, but do not validate
   ///
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 1d4c80c2946..a644f434c24 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -29,10 +29,12 @@
 #include "arrow/ipc/File_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/Message_generated.h"
 #include "arrow/ipc/Tensor_generated.h"  // IWYU pragma: keep
+#include "arrow/ipc/SparseTensor_generated.h"
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/util.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
@@ -50,6 +52,7 @@ using DictionaryOffset = flatbuffers::Offset<flatbuf::DictionaryEncoding>;
 using FieldOffset = flatbuffers::Offset<flatbuf::Field>;
 using KeyValueOffset = flatbuffers::Offset<flatbuf::KeyValue>;
 using RecordBatchOffset = flatbuffers::Offset<flatbuf::RecordBatch>;
+using SparseTensorOffset = flatbuffers::Offset<flatbuf::SparseTensor>;
 using Offset = flatbuffers::Offset<void>;
 using FBString = flatbuffers::Offset<flatbuffers::String>;
 
@@ -781,6 +784,85 @@ Status WriteTensorMessage(const Tensor& tensor, int64_t buffer_start_offset,
                         body_length, out);
 }
 
+Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index, const std::vector<BufferMetadata>& buffers, flatbuf::SparseTensorIndex* fb_sparse_index_type, Offset* fb_sparse_index, size_t* num_buffers) {
+  *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseTensorIndexCOO;
+  const BufferMetadata& indices_metadata = buffers[0];
+  flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
+  *fb_sparse_index = flatbuf::CreateSparseTensorIndexCOO(fbb, &indices).Union();
+  *num_buffers = 1;
+  return Status::OK();
+}
+
+Status MakeSparseMatrixIndexCSR(FBB& fbb, const SparseCSRIndex& sparse_index, const std::vector<BufferMetadata>& buffers, flatbuf::SparseTensorIndex* fb_sparse_index_type, Offset* fb_sparse_index, size_t* num_buffers) {
+  *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseMatrixIndexCSR;
+  const BufferMetadata& indptr_metadata = buffers[0];
+  const BufferMetadata& indices_metadata = buffers[1];
+  flatbuf::Buffer indptr(indptr_metadata.offset, indptr_metadata.length);
+  flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
+  *fb_sparse_index = flatbuf::CreateSparseMatrixIndexCSR(fbb, &indptr, &indices).Union();
+  *num_buffers = 2;
+  return Status::OK();
+}
+
+Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index, const std::vector<BufferMetadata>& buffers, flatbuf::SparseTensorIndex* fb_sparse_index_type, Offset* fb_sparse_index, size_t* num_buffers) {
+  switch (sparse_index.format_id()) {
+    case SparseTensorFormat::COO:
+      RETURN_NOT_OK(MakeSparseTensorIndexCOO(fbb, checked_cast<const SparseCOOIndex&>(sparse_index), buffers, fb_sparse_index_type, fb_sparse_index, num_buffers));
+      break;
+
+    case SparseTensorFormat::CSR:
+      RETURN_NOT_OK(MakeSparseMatrixIndexCSR(fbb, checked_cast<const SparseCSRIndex&>(sparse_index), buffers, fb_sparse_index_type, fb_sparse_index, num_buffers));
+      break;
+
+    default:
+      std::stringstream ss;
+      ss << "Unsupporoted sparse tensor format:: " << sparse_index.ToString() << std::endl;
+      return Status::NotImplemented(ss.str());
+  }
+
+  return Status::OK();
+}
+
+Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor, int64_t body_length,
+                        const std::vector<BufferMetadata>& buffers,
+                        SparseTensorOffset* offset) {
+  flatbuf::Type fb_type_type;
+  Offset fb_type;
+  RETURN_NOT_OK(TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type));
+
+  using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
+  std::vector<TensorDimOffset> dims;
+  for (int i = 0; i < sparse_tensor.ndim(); ++i) {
+    FBString name = fbb.CreateString(sparse_tensor.dim_name(i));
+    dims.push_back(flatbuf::CreateTensorDim(fbb, sparse_tensor.shape()[i], name));
+  }
+
+  auto fb_shape = fbb.CreateVector(dims);
+
+  flatbuf::SparseTensorIndex fb_sparse_index_type;
+  Offset fb_sparse_index;
+  size_t num_index_buffers = 0;
+  RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers, &fb_sparse_index_type, &fb_sparse_index, &num_index_buffers));
+
+  const BufferMetadata& data_metadata = buffers[num_index_buffers];
+  flatbuf::Buffer data(data_metadata.offset, data_metadata.length);
+
+  int64_t length = sparse_tensor.length();
+
+  *offset = flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, length, fb_sparse_index_type, fb_sparse_index, &data);
+
+  return Status::OK();
+}
+
+Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor, int64_t body_length,
+                                const std::vector<BufferMetadata>& buffers,
+                                std::shared_ptr<Buffer>* out) {
+  FBB fbb;
+  SparseTensorOffset fb_sparse_tensor;
+  RETURN_NOT_OK(MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
+  return WriteFBMessage(fbb, flatbuf::MessageHeader_SparseTensor, fb_sparse_tensor.Union(), body_length, out);
+}
+
 Status WriteDictionaryMessage(int64_t id, int64_t length, int64_t body_length,
                               const std::vector<FieldMetadata>& nodes,
                               const std::vector<BufferMetadata>& buffers,
@@ -933,6 +1015,52 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
   return TypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, type);
 }
 
+Status GetSparseTensorMetadata(const Buffer& metadata,
+                               std::shared_ptr<DataType>* type, std::vector<int64_t>* shape,
+                               std::vector<std::string>* dim_names, int64_t* length,
+                               SparseTensorFormat::type* sparse_tensor_format_id)
+{
+  auto message = flatbuf::GetMessage(metadata.data());
+  if (message->header_type() != flatbuf::MessageHeader_SparseTensor) {
+    DCHECK_EQ(message->header_type(), flatbuf::MessageHeader_SparseTensor);
+  }
+  if (message->header() == nullptr) {
+    return Status::IOError("Header-pointer of flatbuffer-encoded Message is null.");
+  }
+
+  auto sparse_tensor = reinterpret_cast<const flatbuf::SparseTensor*>(message->header());
+  int ndim = static_cast<int>(sparse_tensor->shape()->size());
+
+  for (int i = 0; i < ndim; ++i) {
+    auto dim = sparse_tensor->shape()->Get(i);
+
+    shape->push_back(dim->size());
+    auto fb_name = dim->name();
+    if (fb_name == 0) {
+      dim_names->push_back("");
+    } else {
+      dim_names->push_back(fb_name->str());
+    }
+  }
+
+  *length = sparse_tensor->length();
+
+  switch (sparse_tensor->sparseIndex_type()) {
+    case flatbuf::SparseTensorIndex_SparseTensorIndexCOO:
+      *sparse_tensor_format_id = SparseTensorFormat::COO;
+      break;
+
+    case flatbuf::SparseTensorIndex_SparseMatrixIndexCSR:
+      *sparse_tensor_format_id = SparseTensorFormat::CSR;
+      break;
+
+    default:
+      return Status::Invalid("Unrecognized sparse index type");
+  }
+
+  return TypeFromFlatbuffer(sparse_tensor->type_type(), sparse_tensor->type(), {}, type);
+}
+
 // ----------------------------------------------------------------------
 // Implement message writing
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 152ca1367ec..621a9c66bf5 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -33,6 +33,7 @@
 #include "arrow/ipc/dictionary.h"  // IYWU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/memory_pool.h"
+#include "arrow/sparse_tensor_format.h"
 #include "arrow/status.h"
 
 namespace arrow {
@@ -40,6 +41,7 @@ namespace arrow {
 class DataType;
 class Schema;
 class Tensor;
+class SparseTensorBase;
 
 namespace flatbuf = org::apache::arrow::flatbuf;
 
@@ -103,6 +105,11 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
                          std::vector<int64_t>* shape, std::vector<int64_t>* strides,
                          std::vector<std::string>* dim_names);
 
+Status GetSparseTensorMetadata(const Buffer& metadata,
+                               std::shared_ptr<DataType>* type, std::vector<int64_t>* shape,
+                               std::vector<std::string>* dim_names, int64_t* length,
+                               SparseTensorFormat::type* sparse_tensor_format_id);
+
 /// Write a serialized message metadata with a length-prefix and padding to an
 /// 8-byte offset. Does not make assumptions about whether the stream is
 /// aligned already
@@ -137,6 +144,10 @@ Status WriteRecordBatchMessage(const int64_t length, const int64_t body_length,
 Status WriteTensorMessage(const Tensor& tensor, const int64_t buffer_start_offset,
                           std::shared_ptr<Buffer>* out);
 
+Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor, int64_t body_length,
+                                const std::vector<BufferMetadata>& buffers,
+                                std::shared_ptr<Buffer>* out);
+
 Status WriteFileFooter(const Schema& schema, const std::vector<FileBlock>& dictionaries,
                        const std::vector<FileBlock>& record_batches,
                        DictionaryMemo* dictionary_memo, io::OutputStream* out);
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index 3a723badf37..14055013247 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -40,6 +40,7 @@
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
@@ -844,6 +845,115 @@ TEST_F(TestTensorRoundTrip, NonContiguous) {
   CheckTensorRoundTrip(tensor);
 }
 
+class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture {
+ public:
+  void SetUp() { pool_ = default_memory_pool(); }
+  void TearDown() { io::MemoryMapFixture::TearDown(); }
+
+  template <typename SparseIndexType>
+  void CheckSparseTensorRoundTrip(const SparseTensor<SparseIndexType>& tensor) {
+    GTEST_FAIL();
+  }
+};
+
+template <>
+void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
+    const SparseTensor<SparseCOOIndex>& tensor) {
+
+  const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
+  const int elem_size = type.bit_width() / 8;
+
+  int32_t metadata_length;
+  int64_t body_length;
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length, default_memory_pool()));
+
+  const auto& sparse_index = checked_cast<const SparseCOOIndex&>(*tensor.sparse_index());
+  const int64_t indices_length = elem_size * sparse_index.indices()->size();
+  const int64_t data_length = elem_size * tensor.length();
+  const int64_t expected_body_length = indices_length + data_length;
+  ASSERT_EQ(expected_body_length, body_length);
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  std::shared_ptr<SparseTensorBase> result;
+  ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
+
+  const auto& resulted_sparse_index = checked_cast<const SparseCOOIndex&>(*result->sparse_index());
+  ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
+  ASSERT_EQ(result->data()->size(), data_length);
+  // TODO ASSERT_TRUE(sparse_tensor.Equals(*result));
+}
+
+template <>
+void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
+    const SparseTensor<SparseCSRIndex>& tensor) {
+
+  const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
+  const int elem_size = type.bit_width() / 8;
+
+  int32_t metadata_length;
+  int64_t body_length;
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length, default_memory_pool()));
+
+  const auto& sparse_index = checked_cast<const SparseCSRIndex&>(*tensor.sparse_index());
+  const int64_t indptr_length = elem_size * sparse_index.indptr()->size();
+  const int64_t indices_length = elem_size * sparse_index.indices()->size();
+  const int64_t data_length = elem_size * tensor.length();
+  const int64_t expected_body_length = indptr_length + indices_length + data_length;
+  ASSERT_EQ(expected_body_length, body_length);
+
+  ASSERT_OK(mmap_->Seek(0));
+
+  std::shared_ptr<SparseTensorBase> result;
+  ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
+
+  const auto& resulted_sparse_index = checked_cast<const SparseCSRIndex&>(*result->sparse_index());
+  ASSERT_EQ(resulted_sparse_index.indptr()->data()->size(), indptr_length);
+  ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
+  ASSERT_EQ(result->data()->size(), data_length);
+  // TODO ASSERT_TRUE(sparse_tensor.Equals(*result));
+}
+
+TEST_F(TestSparseTensorRoundTrip, WithSparseCOOIndex) {
+  std::string path = "test-write-sparse-coo-tensor";
+  constexpr int64_t kBufferSize = 1 << 20;
+  ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_));
+
+  std::vector<int64_t> shape = {2, 3, 4};
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+
+  auto data = Buffer::Wrap(values);
+  NumericTensor<Int64Type> t(data, shape, {}, dim_names);
+  SparseTensor<SparseCOOIndex> st(t);
+
+  CheckSparseTensorRoundTrip(st);
+}
+
+TEST_F(TestSparseTensorRoundTrip, WithSparseCSRIndex) {
+  std::string path = "test-write-sparse-csr-matrix";
+  constexpr int64_t kBufferSize = 1 << 20;
+  ASSERT_OK(io::MemoryMapFixture::InitMemoryMap(kBufferSize, path, &mmap_));
+
+  std::vector<int64_t> shape = {4, 6};
+  std::vector<std::string> dim_names = {"foo", "bar", "baz"};
+  std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
+                                 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
+
+  auto data = Buffer::Wrap(values);
+  NumericTensor<Int64Type> t(data, shape, {}, dim_names);
+  SparseTensor<SparseCSRIndex> st(t);
+
+  CheckSparseTensorRoundTrip(st);
+}
+
 TEST(TestRecordBatchStreamReader, MalformedInput) {
   const std::string empty_str = "";
   const std::string garbage_str = "12345678";
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 59a322a6433..fabe6f81f01 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -40,6 +40,7 @@
 #include "arrow/record_batch.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
@@ -726,5 +727,105 @@ Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out) {
   return Status::OK();
 }
 
+namespace {
+
+Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim, int64_t length, io::RandomAccessFile* file,
+                          std::shared_ptr<SparseIndex>* out) {
+  auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO();
+  auto* indices_buffer = sparse_index->indicesBuffer();
+  std::shared_ptr<Buffer> indices_data;
+  RETURN_NOT_OK(file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
+  std::vector<int64_t> shape({length, ndim});
+  const int64_t elsize = sizeof(int64_t);
+  std::vector<int64_t> strides({elsize, elsize * length});
+  *out = std::make_shared<SparseCOOIndex>(
+      std::make_shared<SparseCOOIndex::CoordsTensor>(indices_data, shape, strides));
+  return Status::OK();
+}
+
+Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim, int64_t length, io::RandomAccessFile* file,
+                          std::shared_ptr<SparseIndex>* out) {
+  auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR();
+
+  auto* indptr_buffer = sparse_index->indptrBuffer();
+  std::shared_ptr<Buffer> indptr_data;
+  RETURN_NOT_OK(file->ReadAt(indptr_buffer->offset(), indptr_buffer->length(), &indptr_data));
+
+  auto* indices_buffer = sparse_index->indicesBuffer();
+  std::shared_ptr<Buffer> indices_data;
+  RETURN_NOT_OK(file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
+
+  std::vector<int64_t> indptr_shape({ndim+1});
+  std::vector<int64_t> indices_shape({length});
+  *out = std::make_shared<SparseCSRIndex>(
+      std::make_shared<SparseCSRIndex::IndexTensor>(indptr_data, indptr_shape),
+      std::make_shared<SparseCSRIndex::IndexTensor>(indices_data, indices_shape));
+  return Status::OK();
+}
+
+Status MakeSparseTensorWithSparseCOOIndex(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names, const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t length, const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
+  auto* sparse_tensor = new SparseTensor<SparseCOOIndex>(sparse_index, type, data, shape, dim_names);
+  *out = std::shared_ptr<SparseTensorBase>(sparse_tensor);
+  return Status::OK();
+}
+
+Status MakeSparseTensorWithSparseCSRIndex(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names, const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t length, const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
+  auto* sparse_tensor = new SparseTensor<SparseCSRIndex>(sparse_index, type, data, shape, dim_names);
+  *out = std::shared_ptr<SparseTensorBase>(sparse_tensor);
+  return Status::OK();
+}
+
+}  // namespace
+
+Status ReadSparseTensor(const Buffer& metadata,
+                        io::RandomAccessFile* file,
+                        std::shared_ptr<SparseTensorBase>* out) {
+  std::shared_ptr<DataType> type;
+  std::vector<int64_t> shape;
+  std::vector<std::string> dim_names;
+  int64_t length;
+  SparseTensorFormat::type sparse_tensor_format_id;
+
+  RETURN_NOT_OK(internal::GetSparseTensorMetadata(metadata, &type, &shape, &dim_names,
+                                                  &length, &sparse_tensor_format_id));
+
+  auto message = flatbuf::GetMessage(metadata.data());
+  auto sparse_tensor = reinterpret_cast<const flatbuf::SparseTensor*>(message->header());
+  const flatbuf::Buffer* buffer = sparse_tensor->data();
+  DCHECK(BitUtil::IsMultipleOf8(buffer->offset()))
+      << "Buffer of sparse index data "
+      << "did not start on 8-byte aligned offset: " << buffer->offset();
+
+  std::shared_ptr<Buffer> data;
+  RETURN_NOT_OK(file->ReadAt(buffer->offset(), buffer->length(), &data));
+
+  std::shared_ptr<SparseIndex> sparse_index;
+  switch (sparse_tensor_format_id) {
+    case SparseTensorFormat::COO:
+      RETURN_NOT_OK(ReadSparseCOOIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
+      return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, std::dynamic_pointer_cast<SparseCOOIndex>(sparse_index), length, data, out);
+
+    case SparseTensorFormat::CSR:
+      RETURN_NOT_OK(ReadSparseCSRIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
+      return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, std::dynamic_pointer_cast<SparseCSRIndex>(sparse_index), length, data, out);
+
+    default:
+      return Status::Invalid("Unsupported sparse index format");
+  }
+}
+
+Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensorBase>* out) {
+  io::BufferReader buffer_reader(message.body());
+  return ReadSparseTensor(*message.metadata(), &buffer_reader, out);
+}
+
+Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensorBase>* out) {
+  std::unique_ptr<Message> message;
+  RETURN_NOT_OK(ReadContiguousPayload(file, &message));
+  DCHECK_EQ(message->type(), Message::SPARSE_TENSOR);
+  io::BufferReader buffer_reader(message->body());
+  return ReadSparseTensor(*message->metadata(), &buffer_reader, out);
+}
+
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index 942664d6f22..ca15d9c5357 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -33,6 +33,7 @@ class Buffer;
 class Schema;
 class Status;
 class Tensor;
+class SparseTensorBase;
 
 namespace io {
 
@@ -235,6 +236,30 @@ Status ReadTensor(io::InputStream* file, std::shared_ptr<Tensor>* out);
 ARROW_EXPORT
 Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out);
 
+/// \brief Read arrow::SparseTensor as encapsulated IPC message in file
+///
+/// \param[in] file an InputStream pointed at the start of the message
+/// \param[out] out the read sparse tensor
+/// \return Status
+ARROW_EXPORT
+Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensorBase>* out);
+
+/// \brief EXPERIMENTAL: Read arrow::Tensor from IPC message
+///
+/// \param[in] message a Message containing the tensor metadata and body
+/// \param[out] out the read tensor
+/// \return Status
+ARROW_EXPORT
+Status ReadTensor(const Message& message, std::shared_ptr<SparseTensorBase>* out);
+
+/// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
+///
+/// \param[in] message a Message containing the tensor metadata and body
+/// \param[out] out the read sparse tensor
+/// \return Status
+ARROW_EXPORT
+Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensorBase>* out);
+
 }  // namespace ipc
 }  // namespace arrow
 
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index 6ce72e070e7..b02962a2272 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -21,6 +21,7 @@
 #include <cstdint>
 #include <cstring>
 #include <limits>
+#include <sstream>
 #include <vector>
 
 #include "arrow/array.h"
@@ -36,6 +37,7 @@
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
@@ -671,6 +673,105 @@ Status GetTensorMessage(const Tensor& tensor, MemoryPool* pool,
   return Status::OK();
 }
 
+namespace internal {
+
+class SparseTensorSerializer {
+ public:
+  SparseTensorSerializer(int64_t buffer_start_offset, IpcPayload* out)
+      : out_(out),
+        buffer_start_offset_(buffer_start_offset) {}
+
+  ~SparseTensorSerializer() = default;
+
+  Status VisitSparseIndex(const SparseIndex& sparse_index) {
+    switch (sparse_index.format_id()) {
+      case SparseTensorFormat::COO:
+        RETURN_NOT_OK(VisitSparseCOOIndex(checked_cast<const SparseCOOIndex&>(sparse_index)));
+        break;
+
+      case SparseTensorFormat::CSR:
+        RETURN_NOT_OK(VisitSparseCSRIndex(checked_cast<const SparseCSRIndex&>(sparse_index)));
+        break;
+
+      default:
+        std::stringstream ss;
+        ss << "Unable to convert type: " << sparse_index.ToString() << std::endl;
+        return Status::NotImplemented(ss.str());
+    }
+
+    return Status::OK();
+  }
+
+  Status SerializeMetadata(const SparseTensorBase& sparse_tensor) {
+    return WriteSparseTensorMessage(sparse_tensor, out_->body_length, buffer_meta_,
+                                    &out_->metadata);
+  }
+
+  Status Assemble(const SparseTensorBase& sparse_tensor) {
+    if (buffer_meta_.size() > 0) {
+      buffer_meta_.clear();
+      out_->body_buffers.clear();
+    }
+
+    RETURN_NOT_OK(VisitSparseIndex(*sparse_tensor.sparse_index()));
+    out_->body_buffers.emplace_back(sparse_tensor.data());
+
+    int64_t offset = buffer_start_offset_;
+    buffer_meta_.reserve(out_->body_buffers.size());
+
+    for (size_t i = 0; i < out_->body_buffers.size(); ++i) {
+      const Buffer* buffer = out_->body_buffers[i].get();
+      int64_t size = buffer->size();
+      int64_t padding = BitUtil::RoundUpToMultipleOf8(size) - size;
+      buffer_meta_.push_back({offset, size + padding});
+      offset += size + padding;
+    }
+
+    out_->body_length = offset - buffer_start_offset_;
+    DCHECK(BitUtil::IsMultipleOf8(out_->body_length));
+
+    return SerializeMetadata(sparse_tensor);
+  }
+
+ private:
+  Status VisitSparseCOOIndex(const SparseCOOIndex& sparse_index) {
+    out_->body_buffers.emplace_back(sparse_index.indices()->data());
+    return Status::OK();
+  }
+
+  Status VisitSparseCSRIndex(const SparseCSRIndex& sparse_index) {
+    out_->body_buffers.emplace_back(sparse_index.indptr()->data());
+    out_->body_buffers.emplace_back(sparse_index.indices()->data());
+    return Status::OK();
+  }
+
+  IpcPayload* out_;
+
+  std::vector<internal::BufferMetadata> buffer_meta_;
+
+  int64_t buffer_start_offset_;
+};
+
+
+Status GetSparseTensorPayload(const SparseTensorBase& sparse_tensor, MemoryPool* pool,
+                              IpcPayload* out) {
+  SparseTensorSerializer writer(0, out);
+  return writer.Assemble(sparse_tensor);
+}
+
+}  // namespace internal
+
+Status WriteSparseTensor(const SparseTensorBase& sparse_tensor,
+                         io::OutputStream* dst, int32_t* metadata_length,
+                         int64_t* body_length, MemoryPool* pool) {
+  internal::IpcPayload payload;
+  internal::SparseTensorSerializer writer(0, &payload);
+  RETURN_NOT_OK(writer.Assemble(sparse_tensor));
+
+  *body_length = payload.body_length;
+  return internal::WriteIpcPayload(payload, dst, metadata_length);
+}
+
 Status WriteDictionary(int64_t dictionary_id, const std::shared_ptr<Array>& dictionary,
                        int64_t buffer_start_offset, io::OutputStream* dst,
                        int32_t* metadata_length, int64_t* body_length, MemoryPool* pool) {
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index a1c711146ef..85b8663ea49 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -36,6 +36,7 @@ class Schema;
 class Status;
 class Table;
 class Tensor;
+class SparseTensorBase;
 
 namespace io {
 
@@ -269,6 +270,20 @@ ARROW_EXPORT
 Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
                    int64_t* body_length);
 
+// \brief Write arrow::SparseTensor as a contiguous mesasge. The metadata,
+// sparse index, and body are written assuming 64-byte alignment. It is the
+// user's responsibility to ensure that the OutputStream has been aligned
+// to a 64-byte multiple before writing the message.
+//
+// \param[in] tensor the SparseTensor to write
+// \param[in] dst the OutputStream to write to
+// \param[out] metadata_length the actual metadata length, including padding
+// \param[out] body_length the actual message body length
+ARROW_EXPORT
+Status WriteSparseTensor(const SparseTensorBase& sparse_tensor,
+                         io::OutputStream* dst, int32_t* metadata_length,
+                         int64_t* body_length, MemoryPool* pool);
+
 namespace internal {
 
 // These internal APIs may change without warning or deprecation

From b3a62ebfa0c683d4c2215ec4bf4f55948ccec00c Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 7 Dec 2018 15:20:17 +0900
Subject: [PATCH 19/40] Fix format

---
 cpp/src/arrow/ipc/metadata-internal.cc | 60 +++++++++++++++++---------
 cpp/src/arrow/ipc/metadata-internal.h  |  7 +--
 cpp/src/arrow/ipc/read-write-test.cc   | 16 ++++---
 cpp/src/arrow/ipc/reader.cc            | 54 +++++++++++++++--------
 cpp/src/arrow/ipc/writer.cc            | 18 ++++----
 cpp/src/arrow/ipc/writer.h             |  6 +--
 6 files changed, 102 insertions(+), 59 deletions(-)

diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index a644f434c24..1b315d20746 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -28,13 +28,13 @@
 #include "arrow/io/interfaces.h"
 #include "arrow/ipc/File_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/Message_generated.h"
-#include "arrow/ipc/Tensor_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/SparseTensor_generated.h"
+#include "arrow/ipc/Tensor_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/util.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
-#include "arrow/sparse_tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
@@ -784,7 +784,10 @@ Status WriteTensorMessage(const Tensor& tensor, int64_t buffer_start_offset,
                         body_length, out);
 }
 
-Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index, const std::vector<BufferMetadata>& buffers, flatbuf::SparseTensorIndex* fb_sparse_index_type, Offset* fb_sparse_index, size_t* num_buffers) {
+Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index,
+                                const std::vector<BufferMetadata>& buffers,
+                                flatbuf::SparseTensorIndex* fb_sparse_index_type,
+                                Offset* fb_sparse_index, size_t* num_buffers) {
   *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseTensorIndexCOO;
   const BufferMetadata& indices_metadata = buffers[0];
   flatbuf::Buffer indices(indices_metadata.offset, indices_metadata.length);
@@ -793,7 +796,10 @@ Status MakeSparseTensorIndexCOO(FBB& fbb, const SparseCOOIndex& sparse_index, co
   return Status::OK();
 }
 
-Status MakeSparseMatrixIndexCSR(FBB& fbb, const SparseCSRIndex& sparse_index, const std::vector<BufferMetadata>& buffers, flatbuf::SparseTensorIndex* fb_sparse_index_type, Offset* fb_sparse_index, size_t* num_buffers) {
+Status MakeSparseMatrixIndexCSR(FBB& fbb, const SparseCSRIndex& sparse_index,
+                                const std::vector<BufferMetadata>& buffers,
+                                flatbuf::SparseTensorIndex* fb_sparse_index_type,
+                                Offset* fb_sparse_index, size_t* num_buffers) {
   *fb_sparse_index_type = flatbuf::SparseTensorIndex_SparseMatrixIndexCSR;
   const BufferMetadata& indptr_metadata = buffers[0];
   const BufferMetadata& indices_metadata = buffers[1];
@@ -804,31 +810,40 @@ Status MakeSparseMatrixIndexCSR(FBB& fbb, const SparseCSRIndex& sparse_index, co
   return Status::OK();
 }
 
-Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index, const std::vector<BufferMetadata>& buffers, flatbuf::SparseTensorIndex* fb_sparse_index_type, Offset* fb_sparse_index, size_t* num_buffers) {
+Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index,
+                             const std::vector<BufferMetadata>& buffers,
+                             flatbuf::SparseTensorIndex* fb_sparse_index_type,
+                             Offset* fb_sparse_index, size_t* num_buffers) {
   switch (sparse_index.format_id()) {
     case SparseTensorFormat::COO:
-      RETURN_NOT_OK(MakeSparseTensorIndexCOO(fbb, checked_cast<const SparseCOOIndex&>(sparse_index), buffers, fb_sparse_index_type, fb_sparse_index, num_buffers));
+      RETURN_NOT_OK(MakeSparseTensorIndexCOO(
+          fbb, checked_cast<const SparseCOOIndex&>(sparse_index), buffers,
+          fb_sparse_index_type, fb_sparse_index, num_buffers));
       break;
 
     case SparseTensorFormat::CSR:
-      RETURN_NOT_OK(MakeSparseMatrixIndexCSR(fbb, checked_cast<const SparseCSRIndex&>(sparse_index), buffers, fb_sparse_index_type, fb_sparse_index, num_buffers));
+      RETURN_NOT_OK(MakeSparseMatrixIndexCSR(
+          fbb, checked_cast<const SparseCSRIndex&>(sparse_index), buffers,
+          fb_sparse_index_type, fb_sparse_index, num_buffers));
       break;
 
     default:
       std::stringstream ss;
-      ss << "Unsupporoted sparse tensor format:: " << sparse_index.ToString() << std::endl;
+      ss << "Unsupporoted sparse tensor format:: " << sparse_index.ToString()
+         << std::endl;
       return Status::NotImplemented(ss.str());
   }
 
   return Status::OK();
 }
 
-Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor, int64_t body_length,
-                        const std::vector<BufferMetadata>& buffers,
+Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor,
+                        int64_t body_length, const std::vector<BufferMetadata>& buffers,
                         SparseTensorOffset* offset) {
   flatbuf::Type fb_type_type;
   Offset fb_type;
-  RETURN_NOT_OK(TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type));
+  RETURN_NOT_OK(
+      TensorTypeToFlatbuffer(fbb, *sparse_tensor.type(), &fb_type_type, &fb_type));
 
   using TensorDimOffset = flatbuffers::Offset<flatbuf::TensorDim>;
   std::vector<TensorDimOffset> dims;
@@ -842,25 +857,31 @@ Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor, int64_t
   flatbuf::SparseTensorIndex fb_sparse_index_type;
   Offset fb_sparse_index;
   size_t num_index_buffers = 0;
-  RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers, &fb_sparse_index_type, &fb_sparse_index, &num_index_buffers));
+  RETURN_NOT_OK(MakeSparseTensorIndex(fbb, *sparse_tensor.sparse_index(), buffers,
+                                      &fb_sparse_index_type, &fb_sparse_index,
+                                      &num_index_buffers));
 
   const BufferMetadata& data_metadata = buffers[num_index_buffers];
   flatbuf::Buffer data(data_metadata.offset, data_metadata.length);
 
   int64_t length = sparse_tensor.length();
 
-  *offset = flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, length, fb_sparse_index_type, fb_sparse_index, &data);
+  *offset = flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, length,
+                                        fb_sparse_index_type, fb_sparse_index, &data);
 
   return Status::OK();
 }
 
-Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor, int64_t body_length,
+Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor,
+                                int64_t body_length,
                                 const std::vector<BufferMetadata>& buffers,
                                 std::shared_ptr<Buffer>* out) {
   FBB fbb;
   SparseTensorOffset fb_sparse_tensor;
-  RETURN_NOT_OK(MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
-  return WriteFBMessage(fbb, flatbuf::MessageHeader_SparseTensor, fb_sparse_tensor.Union(), body_length, out);
+  RETURN_NOT_OK(
+      MakeSparseTensor(fbb, sparse_tensor, body_length, buffers, &fb_sparse_tensor));
+  return WriteFBMessage(fbb, flatbuf::MessageHeader_SparseTensor,
+                        fb_sparse_tensor.Union(), body_length, out);
 }
 
 Status WriteDictionaryMessage(int64_t id, int64_t length, int64_t body_length,
@@ -1015,11 +1036,10 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
   return TypeFromFlatbuffer(tensor->type_type(), tensor->type(), {}, type);
 }
 
-Status GetSparseTensorMetadata(const Buffer& metadata,
-                               std::shared_ptr<DataType>* type, std::vector<int64_t>* shape,
+Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+                               std::vector<int64_t>* shape,
                                std::vector<std::string>* dim_names, int64_t* length,
-                               SparseTensorFormat::type* sparse_tensor_format_id)
-{
+                               SparseTensorFormat::type* sparse_tensor_format_id) {
   auto message = flatbuf::GetMessage(metadata.data());
   if (message->header_type() != flatbuf::MessageHeader_SparseTensor) {
     DCHECK_EQ(message->header_type(), flatbuf::MessageHeader_SparseTensor);
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 621a9c66bf5..420fca8a0bd 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -105,8 +105,8 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
                          std::vector<int64_t>* shape, std::vector<int64_t>* strides,
                          std::vector<std::string>* dim_names);
 
-Status GetSparseTensorMetadata(const Buffer& metadata,
-                               std::shared_ptr<DataType>* type, std::vector<int64_t>* shape,
+Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
+                               std::vector<int64_t>* shape,
                                std::vector<std::string>* dim_names, int64_t* length,
                                SparseTensorFormat::type* sparse_tensor_format_id);
 
@@ -144,7 +144,8 @@ Status WriteRecordBatchMessage(const int64_t length, const int64_t body_length,
 Status WriteTensorMessage(const Tensor& tensor, const int64_t buffer_start_offset,
                           std::shared_ptr<Buffer>* out);
 
-Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor, int64_t body_length,
+Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor,
+                                int64_t body_length,
                                 const std::vector<BufferMetadata>& buffers,
                                 std::shared_ptr<Buffer>* out);
 
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index 14055013247..c69d139a37a 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -38,9 +38,9 @@
 #include "arrow/ipc/writer.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
-#include "arrow/sparse_tensor.h"
 #include "arrow/test-util.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
@@ -859,7 +859,6 @@ class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture
 template <>
 void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
     const SparseTensor<SparseCOOIndex>& tensor) {
-
   const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
   const int elem_size = type.bit_width() / 8;
 
@@ -868,7 +867,8 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
 
   ASSERT_OK(mmap_->Seek(0));
 
-  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length, default_memory_pool()));
+  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length,
+                              default_memory_pool()));
 
   const auto& sparse_index = checked_cast<const SparseCOOIndex&>(*tensor.sparse_index());
   const int64_t indices_length = elem_size * sparse_index.indices()->size();
@@ -881,7 +881,8 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
   std::shared_ptr<SparseTensorBase> result;
   ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
 
-  const auto& resulted_sparse_index = checked_cast<const SparseCOOIndex&>(*result->sparse_index());
+  const auto& resulted_sparse_index =
+      checked_cast<const SparseCOOIndex&>(*result->sparse_index());
   ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
   ASSERT_EQ(result->data()->size(), data_length);
   // TODO ASSERT_TRUE(sparse_tensor.Equals(*result));
@@ -890,7 +891,6 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
 template <>
 void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
     const SparseTensor<SparseCSRIndex>& tensor) {
-
   const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
   const int elem_size = type.bit_width() / 8;
 
@@ -899,7 +899,8 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
 
   ASSERT_OK(mmap_->Seek(0));
 
-  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length, default_memory_pool()));
+  ASSERT_OK(WriteSparseTensor(tensor, mmap_.get(), &metadata_length, &body_length,
+                              default_memory_pool()));
 
   const auto& sparse_index = checked_cast<const SparseCSRIndex&>(*tensor.sparse_index());
   const int64_t indptr_length = elem_size * sparse_index.indptr()->size();
@@ -913,7 +914,8 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
   std::shared_ptr<SparseTensorBase> result;
   ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
 
-  const auto& resulted_sparse_index = checked_cast<const SparseCSRIndex&>(*result->sparse_index());
+  const auto& resulted_sparse_index =
+      checked_cast<const SparseCSRIndex&>(*result->sparse_index());
   ASSERT_EQ(resulted_sparse_index.indptr()->data()->size(), indptr_length);
   ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
   ASSERT_EQ(result->data()->size(), data_length);
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index fabe6f81f01..62faaef0475 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -38,9 +38,9 @@
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/metadata-internal.h"
 #include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
-#include "arrow/sparse_tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/logging.h"
 #include "arrow/visitor_inline.h"
@@ -729,12 +729,14 @@ Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out) {
 
 namespace {
 
-Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim, int64_t length, io::RandomAccessFile* file,
+Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim,
+                          int64_t length, io::RandomAccessFile* file,
                           std::shared_ptr<SparseIndex>* out) {
   auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO();
   auto* indices_buffer = sparse_index->indicesBuffer();
   std::shared_ptr<Buffer> indices_data;
-  RETURN_NOT_OK(file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
+  RETURN_NOT_OK(
+      file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
   std::vector<int64_t> shape({length, ndim});
   const int64_t elsize = sizeof(int64_t);
   std::vector<int64_t> strides({elsize, elsize * length});
@@ -743,19 +745,22 @@ Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t nd
   return Status::OK();
 }
 
-Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim, int64_t length, io::RandomAccessFile* file,
+Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim,
+                          int64_t length, io::RandomAccessFile* file,
                           std::shared_ptr<SparseIndex>* out) {
   auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR();
 
   auto* indptr_buffer = sparse_index->indptrBuffer();
   std::shared_ptr<Buffer> indptr_data;
-  RETURN_NOT_OK(file->ReadAt(indptr_buffer->offset(), indptr_buffer->length(), &indptr_data));
+  RETURN_NOT_OK(
+      file->ReadAt(indptr_buffer->offset(), indptr_buffer->length(), &indptr_data));
 
   auto* indices_buffer = sparse_index->indicesBuffer();
   std::shared_ptr<Buffer> indices_data;
-  RETURN_NOT_OK(file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
+  RETURN_NOT_OK(
+      file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
 
-  std::vector<int64_t> indptr_shape({ndim+1});
+  std::vector<int64_t> indptr_shape({ndim + 1});
   std::vector<int64_t> indices_shape({length});
   *out = std::make_shared<SparseCSRIndex>(
       std::make_shared<SparseCSRIndex::IndexTensor>(indptr_data, indptr_shape),
@@ -763,22 +768,31 @@ Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t nd
   return Status::OK();
 }
 
-Status MakeSparseTensorWithSparseCOOIndex(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names, const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t length, const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
-  auto* sparse_tensor = new SparseTensor<SparseCOOIndex>(sparse_index, type, data, shape, dim_names);
+Status MakeSparseTensorWithSparseCOOIndex(
+    const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+    const std::vector<std::string>& dim_names,
+    const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t length,
+    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
+  auto* sparse_tensor =
+      new SparseTensor<SparseCOOIndex>(sparse_index, type, data, shape, dim_names);
   *out = std::shared_ptr<SparseTensorBase>(sparse_tensor);
   return Status::OK();
 }
 
-Status MakeSparseTensorWithSparseCSRIndex(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape, const std::vector<std::string>& dim_names, const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t length, const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
-  auto* sparse_tensor = new SparseTensor<SparseCSRIndex>(sparse_index, type, data, shape, dim_names);
+Status MakeSparseTensorWithSparseCSRIndex(
+    const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+    const std::vector<std::string>& dim_names,
+    const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t length,
+    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
+  auto* sparse_tensor =
+      new SparseTensor<SparseCSRIndex>(sparse_index, type, data, shape, dim_names);
   *out = std::shared_ptr<SparseTensorBase>(sparse_tensor);
   return Status::OK();
 }
 
 }  // namespace
 
-Status ReadSparseTensor(const Buffer& metadata,
-                        io::RandomAccessFile* file,
+Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
                         std::shared_ptr<SparseTensorBase>* out) {
   std::shared_ptr<DataType> type;
   std::vector<int64_t> shape;
@@ -802,12 +816,18 @@ Status ReadSparseTensor(const Buffer& metadata,
   std::shared_ptr<SparseIndex> sparse_index;
   switch (sparse_tensor_format_id) {
     case SparseTensorFormat::COO:
-      RETURN_NOT_OK(ReadSparseCOOIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
-      return MakeSparseTensorWithSparseCOOIndex(type, shape, dim_names, std::dynamic_pointer_cast<SparseCOOIndex>(sparse_index), length, data, out);
+      RETURN_NOT_OK(
+          ReadSparseCOOIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
+      return MakeSparseTensorWithSparseCOOIndex(
+          type, shape, dim_names, std::dynamic_pointer_cast<SparseCOOIndex>(sparse_index),
+          length, data, out);
 
     case SparseTensorFormat::CSR:
-      RETURN_NOT_OK(ReadSparseCSRIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
-      return MakeSparseTensorWithSparseCSRIndex(type, shape, dim_names, std::dynamic_pointer_cast<SparseCSRIndex>(sparse_index), length, data, out);
+      RETURN_NOT_OK(
+          ReadSparseCSRIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
+      return MakeSparseTensorWithSparseCSRIndex(
+          type, shape, dim_names, std::dynamic_pointer_cast<SparseCSRIndex>(sparse_index),
+          length, data, out);
 
     default:
       return Status::Invalid("Unsupported sparse index format");
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index b02962a2272..cd1d2773c0b 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -34,10 +34,10 @@
 #include "arrow/ipc/util.h"
 #include "arrow/memory_pool.h"
 #include "arrow/record_batch.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/table.h"
 #include "arrow/tensor.h"
-#include "arrow/sparse_tensor.h"
 #include "arrow/type.h"
 #include "arrow/util/bit-util.h"
 #include "arrow/util/checked_cast.h"
@@ -678,19 +678,20 @@ namespace internal {
 class SparseTensorSerializer {
  public:
   SparseTensorSerializer(int64_t buffer_start_offset, IpcPayload* out)
-      : out_(out),
-        buffer_start_offset_(buffer_start_offset) {}
+      : out_(out), buffer_start_offset_(buffer_start_offset) {}
 
   ~SparseTensorSerializer() = default;
 
   Status VisitSparseIndex(const SparseIndex& sparse_index) {
     switch (sparse_index.format_id()) {
       case SparseTensorFormat::COO:
-        RETURN_NOT_OK(VisitSparseCOOIndex(checked_cast<const SparseCOOIndex&>(sparse_index)));
+        RETURN_NOT_OK(
+            VisitSparseCOOIndex(checked_cast<const SparseCOOIndex&>(sparse_index)));
         break;
 
       case SparseTensorFormat::CSR:
-        RETURN_NOT_OK(VisitSparseCSRIndex(checked_cast<const SparseCSRIndex&>(sparse_index)));
+        RETURN_NOT_OK(
+            VisitSparseCSRIndex(checked_cast<const SparseCSRIndex&>(sparse_index)));
         break;
 
       default:
@@ -752,7 +753,6 @@ class SparseTensorSerializer {
   int64_t buffer_start_offset_;
 };
 
-
 Status GetSparseTensorPayload(const SparseTensorBase& sparse_tensor, MemoryPool* pool,
                               IpcPayload* out) {
   SparseTensorSerializer writer(0, out);
@@ -761,9 +761,9 @@ Status GetSparseTensorPayload(const SparseTensorBase& sparse_tensor, MemoryPool*
 
 }  // namespace internal
 
-Status WriteSparseTensor(const SparseTensorBase& sparse_tensor,
-                         io::OutputStream* dst, int32_t* metadata_length,
-                         int64_t* body_length, MemoryPool* pool) {
+Status WriteSparseTensor(const SparseTensorBase& sparse_tensor, io::OutputStream* dst,
+                         int32_t* metadata_length, int64_t* body_length,
+                         MemoryPool* pool) {
   internal::IpcPayload payload;
   internal::SparseTensorSerializer writer(0, &payload);
   RETURN_NOT_OK(writer.Assemble(sparse_tensor));
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 85b8663ea49..996f0ff7302 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -280,9 +280,9 @@ Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadat
 // \param[out] metadata_length the actual metadata length, including padding
 // \param[out] body_length the actual message body length
 ARROW_EXPORT
-Status WriteSparseTensor(const SparseTensorBase& sparse_tensor,
-                         io::OutputStream* dst, int32_t* metadata_length,
-                         int64_t* body_length, MemoryPool* pool);
+Status WriteSparseTensor(const SparseTensorBase& sparse_tensor, io::OutputStream* dst,
+                         int32_t* metadata_length, int64_t* body_length,
+                         MemoryPool* pool);
 
 namespace internal {
 

From d6a8c380591d0e1573015d7ef6897d539b2549d0 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 7 Dec 2018 16:31:45 +0900
Subject: [PATCH 20/40] Unify Tensor.fbs and SparseTensor.fbs

---
 cpp/src/arrow/ipc/CMakeLists.txt       |   1 -
 cpp/src/arrow/ipc/metadata-internal.cc |   1 -
 format/Message.fbs                     |   1 -
 format/SparseTensor.fbs                | 114 -------------------------
 format/Tensor.fbs                      |  96 +++++++++++++++++++++
 5 files changed, 96 insertions(+), 117 deletions(-)
 delete mode 100644 format/SparseTensor.fbs

diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt
index 07e333b6edd..422e72e2eda 100644
--- a/cpp/src/arrow/ipc/CMakeLists.txt
+++ b/cpp/src/arrow/ipc/CMakeLists.txt
@@ -64,7 +64,6 @@ set(FBS_SRC
   ${CMAKE_SOURCE_DIR}/../format/File.fbs
   ${CMAKE_SOURCE_DIR}/../format/Schema.fbs
   ${CMAKE_SOURCE_DIR}/../format/Tensor.fbs
-  ${CMAKE_SOURCE_DIR}/../format/SparseTensor.fbs
   ${CMAKE_CURRENT_SOURCE_DIR}/feather.fbs)
 
 foreach(FIL ${FBS_SRC})
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 1b315d20746..7d301b1d4e0 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -28,7 +28,6 @@
 #include "arrow/io/interfaces.h"
 #include "arrow/ipc/File_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/Message_generated.h"
-#include "arrow/ipc/SparseTensor_generated.h"
 #include "arrow/ipc/Tensor_generated.h"  // IWYU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/ipc/util.h"
diff --git a/format/Message.fbs b/format/Message.fbs
index d7dcd7647fd..e14fdca8f15 100644
--- a/format/Message.fbs
+++ b/format/Message.fbs
@@ -17,7 +17,6 @@
 
 include "Schema.fbs";
 include "Tensor.fbs";
-include "SparseTensor.fbs";
 
 namespace org.apache.arrow.flatbuf;
 
diff --git a/format/SparseTensor.fbs b/format/SparseTensor.fbs
deleted file mode 100644
index 95666979bb5..00000000000
--- a/format/SparseTensor.fbs
+++ /dev/null
@@ -1,114 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-/// EXPERIMENTAL: Metadata for n-dimensional sparse tensors, that contains
-/// only non-zero values.  Arrow implementations in general are not required
-/// to implement this type
-
-include "Tensor.fbs";
-
-namespace org.apache.arrow.flatbuf;
-
-/// Coodinate format.
-table SparseTensorIndexCOO {
-  /// COO's index list are represented as a NxM matrix,
-  /// where N is the number of non-zero values,
-  /// and M is the number of dimensions of a sparse tensor.
-  /// indicesBuffer stores the location and size of this index matrix.
-  /// The type of index value is long, so the stride for the index matrix is unnecessary.
-  ///
-  /// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
-  ///
-  ///   X[0, 1, 2, 0] := 1
-  ///   X[1, 1, 2, 3] := 2
-  ///   X[0, 2, 1, 0] := 3
-  ///   X[0, 1, 3, 0] := 4
-  ///   X[0, 1, 2, 1] := 5
-  ///   X[1, 2, 0, 4] := 6
-  ///
-  /// In COO format, the index matrix of X is the following 4x10 matrix:
-  ///
-  ///   [[0, 0, 0, 0, 1, 1],
-  ///    [1, 1, 1, 2, 1, 2],
-  ///    [2, 2, 3, 1, 2, 0],
-  ///    [0, 1, 0, 0, 3, 4]]
-  ///
-  /// Note that the indices are sorted in lexcographical order.
-  indicesBuffer: Buffer;
-}
-
-/// Compressed Sparse Row format, that is matrix-specific.
-table SparseMatrixIndexCSR {
-  /// indptrBuffer stores the location and size of indptr array that
-  /// represents the range of the rows.
-  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
-  /// The length of this array is 1 + (the number of rows), and the type
-  /// of index value is long.
-  ///
-  /// For example, let X be the following 6x4 matrix:
-  ///
-  ///   X := [[0, 1, 2, 0], 
-  ///         [0, 0, 3, 0],
-  ///         [0, 4, 0, 5],
-  ///         [0, 0, 0, 0],
-  ///         [6, 0, 7, 8],
-  ///         [0, 9, 0, 0]].
-  ///
-  /// The array of non-zero values in X is:
-  ///
-  ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
-  ///
-  /// And the indptr of X is:
-  ///
-  ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
-  indptrBuffer: Buffer;
-
-  /// indicesBuffer stores the location and size of the array that
-  /// contains the column indices of the corresponding non-zero values.
-  /// The type of index value is long.
-  ///
-  /// For example, the indices of the above X is:
-  ///
-  ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
-  indicesBuffer: Buffer;
-}
-
-union SparseTensorIndex {
-  SparseTensorIndexCOO,
-  SparseMatrixIndexCSR
-}
-
-table SparseTensor {
-  /// The type of data contained in a value cell.
-  /// Currently only fixed-width value types are supported,
-  /// no strings or nested types.
-  type: Type;
-
-  /// The dimensions of the tensor, optionally named.
-  shape: [TensorDim];
-
-  /// The number of non-zero values in a sparse tensor.
-  length: long;
-
-  /// Sparse tensor index
-  sparseIndex: SparseTensorIndex;
-
-  /// The location and size of the tensor's data
-  data: Buffer;
-}
-
-root_type SparseTensor;
diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index 18b614c3bde..74000f90259 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -23,6 +23,9 @@ include "Schema.fbs";
 
 namespace org.apache.arrow.flatbuf;
 
+/// ----------------------------------------------------------------------
+/// Data structures for dense tensors
+
 /// Shape data for a single axis in a tensor
 table TensorDim {
   /// Length of dimension
@@ -48,3 +51,96 @@ table Tensor {
 }
 
 root_type Tensor;
+
+/// ----------------------------------------------------------------------
+/// Data structures for sparse tensors
+
+/// Coodinate format of sparse tensor index.
+table SparseTensorIndexCOO {
+  /// COO's index list are represented as a NxM matrix,
+  /// where N is the number of non-zero values,
+  /// and M is the number of dimensions of a sparse tensor.
+  /// indicesBuffer stores the location and size of this index matrix.
+  /// The type of index value is long, so the stride for the index matrix is unnecessary.
+  ///
+  /// For example, let X be a 2x3x4x5 tensor, and it has the following 6 non-zero values:
+  ///
+  ///   X[0, 1, 2, 0] := 1
+  ///   X[1, 1, 2, 3] := 2
+  ///   X[0, 2, 1, 0] := 3
+  ///   X[0, 1, 3, 0] := 4
+  ///   X[0, 1, 2, 1] := 5
+  ///   X[1, 2, 0, 4] := 6
+  ///
+  /// In COO format, the index matrix of X is the following 4x10 matrix:
+  ///
+  ///   [[0, 0, 0, 0, 1, 1],
+  ///    [1, 1, 1, 2, 1, 2],
+  ///    [2, 2, 3, 1, 2, 0],
+  ///    [0, 1, 0, 0, 3, 4]]
+  ///
+  /// Note that the indices are sorted in lexcographical order.
+  indicesBuffer: Buffer;
+}
+
+/// Compressed Sparse Row format, that is matrix-specific.
+table SparseMatrixIndexCSR {
+  /// indptrBuffer stores the location and size of indptr array that
+  /// represents the range of the rows.
+  /// The i-th row spans from indptr[i] to indptr[i+1] in the data.
+  /// The length of this array is 1 + (the number of rows), and the type
+  /// of index value is long.
+  ///
+  /// For example, let X be the following 6x4 matrix:
+  ///
+  ///   X := [[0, 1, 2, 0],
+  ///         [0, 0, 3, 0],
+  ///         [0, 4, 0, 5],
+  ///         [0, 0, 0, 0],
+  ///         [6, 0, 7, 8],
+  ///         [0, 9, 0, 0]].
+  ///
+  /// The array of non-zero values in X is:
+  ///
+  ///   values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9].
+  ///
+  /// And the indptr of X is:
+  ///
+  ///   indptr(X) = [0, 2, 3, 5, 5, 8, 10].
+  indptrBuffer: Buffer;
+
+  /// indicesBuffer stores the location and size of the array that
+  /// contains the column indices of the corresponding non-zero values.
+  /// The type of index value is long.
+  ///
+  /// For example, the indices of the above X is:
+  ///
+  ///   indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1].
+  indicesBuffer: Buffer;
+}
+
+union SparseTensorIndex {
+  SparseTensorIndexCOO,
+  SparseMatrixIndexCSR
+}
+
+table SparseTensor {
+  /// The type of data contained in a value cell.
+  /// Currently only fixed-width value types are supported,
+  /// no strings or nested types.
+  type: Type;
+
+  /// The dimensions of the tensor, optionally named.
+  shape: [TensorDim];
+
+  /// The number of non-zero values in a sparse tensor.
+  length: long;
+
+  /// Sparse tensor index
+  sparseIndex: SparseTensorIndex;
+
+  /// The location and size of the tensor's data
+  data: Buffer;
+}
+
+root_type SparseTensor;

From 3b1db7d32644e5057728b39e2bff3bac0a8c13e8 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 7 Dec 2018 22:41:02 +0900
Subject: [PATCH 21/40] Add SparseTensorBase::Equals

---
 cpp/src/arrow/compare.cc             | 90 ++++++++++++++++++++++++++++
 cpp/src/arrow/compare.h              |  4 ++
 cpp/src/arrow/ipc/read-write-test.cc |  4 +-
 cpp/src/arrow/sparse_tensor.cc       |  5 ++
 cpp/src/arrow/sparse_tensor.h        | 10 ++++
 5 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index efc8ad82faf..86bf87b41b2 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -30,6 +30,7 @@
 
 #include "arrow/array.h"
 #include "arrow/buffer.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 #include "arrow/tensor.h"
 #include "arrow/type.h"
@@ -782,6 +783,95 @@ bool TensorEquals(const Tensor& left, const Tensor& right) {
   return are_equal;
 }
 
+namespace {
+
+template <typename LeftSparseIndexType, typename RightSparseIndexType>
+struct SparseTensorEqualsImpl {
+  static bool compare(const SparseTensor<LeftSparseIndexType>& left,
+                      const SparseTensor<RightSparseIndexType>& right) {
+    // TODO(mrkn): should we support the equality among different formats?
+    return false;
+  }
+};
+
+template <typename SparseIndexType>
+struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
+  static bool compare(const SparseTensor<SparseIndexType>& left,
+                      const SparseTensor<SparseIndexType>& right) {
+    DCHECK(left.type()->id() == right.type()->id());
+    DCHECK(left.shape() == right.shape());
+    DCHECK(left.length() == right.length());
+
+    const auto& left_index = checked_cast<const SparseIndexType&>(*left.sparse_index());
+    const auto& right_index = checked_cast<const SparseIndexType&>(*right.sparse_index());
+
+    if (!left_index.Equals(right_index)) {
+      return false;
+    }
+
+    const auto& size_meta = dynamic_cast<const FixedWidthType&>(*left.type());
+    const int byte_width = size_meta.bit_width() / CHAR_BIT;
+    DCHECK_GT(byte_width, 0);
+
+    const uint8_t* left_data = left.data()->data();
+    const uint8_t* right_data = right.data()->data();
+
+    return memcmp(left_data, right_data, static_cast<size_t>(byte_width * left.length()));
+  }
+};
+
+template <typename SparseTensorType>
+inline bool SparseTensorEqualsImplDispatch(const SparseTensor<SparseTensorType>& left,
+                                           const SparseTensorBase& right) {
+  switch (right.sparse_tensor_format_id()) {
+    case SparseTensorFormat::COO: {
+      const auto& right_coo = checked_cast<const SparseTensor<SparseCOOIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseTensorType, SparseCOOIndex>::compare(left,
+                                                                               right_coo);
+    }
+
+    case SparseTensorFormat::CSR: {
+      const auto& right_csr = checked_cast<const SparseTensor<SparseCSRIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseTensorType, SparseCSRIndex>::compare(left,
+                                                                               right_csr);
+    }
+
+    default:
+      return false;
+  }
+}
+
+}  // namespace
+
+bool SparseTensorEquals(const SparseTensorBase& left, const SparseTensorBase& right) {
+  if (&left == &right) {
+    return true;
+  } else if (left.type()->id() != right.type()->id()) {
+    return false;
+  } else if (left.size() == 0) {
+    return true;
+  } else if (left.shape() != right.shape()) {
+    return false;
+  } else if (left.length() != right.length()) {
+    return false;
+  }
+
+  switch (left.sparse_tensor_format_id()) {
+    case SparseTensorFormat::COO: {
+      const auto& left_coo = checked_cast<const SparseTensor<SparseCOOIndex>&>(left);
+      return SparseTensorEqualsImplDispatch(left_coo, right);
+    }
+
+    case SparseTensorFormat::CSR: {
+      const auto& left_csr = checked_cast<const SparseTensor<SparseCSRIndex>&>(left);
+      return SparseTensorEqualsImplDispatch(left_csr, right);
+    }
+
+    default:
+      return false;
+  }
+}
+
 bool TypeEquals(const DataType& left, const DataType& right) {
   bool are_equal;
   // The arrays are the same object
diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h
index 21e2fdc24f1..bc4877dcbd4 100644
--- a/cpp/src/arrow/compare.h
+++ b/cpp/src/arrow/compare.h
@@ -29,12 +29,16 @@ namespace arrow {
 class Array;
 class DataType;
 class Tensor;
+class SparseTensorBase;
 
 /// Returns true if the arrays are exactly equal
 bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right);
 
 bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right);
 
+bool ARROW_EXPORT SparseTensorEquals(const SparseTensorBase& left,
+                                     const SparseTensorBase& right);
+
 /// Returns true if the arrays are approximately equal. For non-floating point
 /// types, this is equivalent to ArrayEquals(left, right)
 bool ARROW_EXPORT ArrayApproxEquals(const Array& left, const Array& right);
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index c69d139a37a..79a84a8497f 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -885,7 +885,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
       checked_cast<const SparseCOOIndex&>(*result->sparse_index());
   ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
   ASSERT_EQ(result->data()->size(), data_length);
-  // TODO ASSERT_TRUE(sparse_tensor.Equals(*result));
+  ASSERT_TRUE(result->Equals(*result));
 }
 
 template <>
@@ -919,7 +919,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
   ASSERT_EQ(resulted_sparse_index.indptr()->data()->size(), indptr_length);
   ASSERT_EQ(resulted_sparse_index.indices()->data()->size(), indices_length);
   ASSERT_EQ(result->data()->size(), data_length);
-  // TODO ASSERT_TRUE(sparse_tensor.Equals(*result));
+  ASSERT_TRUE(result->Equals(*result));
 }
 
 TEST_F(TestSparseTensorRoundTrip, WithSparseCOOIndex) {
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index a8d96c3bd62..844be035fd7 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -21,6 +21,7 @@
 #include <memory>
 #include <numeric>
 
+#include "arrow/compare.h"
 #include "arrow/util/logging.h"
 
 namespace arrow {
@@ -336,6 +337,10 @@ int64_t SparseTensorBase::size() const {
   return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
 }
 
+bool SparseTensorBase::Equals(const SparseTensorBase& other) const {
+  return SparseTensorEquals(*this, other);
+}
+
 // ----------------------------------------------------------------------
 // SparseTensor
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index cc55e734ec9..e25703a47e0 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -70,6 +70,10 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
 
   std::string ToString() const override;
 
+  bool Equals(const SparseCOOIndex& other) const {
+    return indices()->Equals(*other.indices());
+  }
+
  protected:
   std::shared_ptr<CoordsTensor> coords_;
 };
@@ -92,6 +96,10 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
 
   std::string ToString() const override;
 
+  bool Equals(const SparseCSRIndex& other) const {
+    return indptr()->Equals(*other.indptr()) && indices()->Equals(*other.indices());
+  }
+
  protected:
   std::shared_ptr<IndexTensor> indptr_;
   std::shared_ptr<IndexTensor> indices_;
@@ -129,6 +137,8 @@ class ARROW_EXPORT SparseTensorBase {
   /// Total number of non-zero cells in the sparse tensor
   virtual int64_t length() const = 0;
 
+  bool Equals(const SparseTensorBase& other) const;
+
  protected:
   // Constructor with all attributes
   SparseTensorBase(const std::shared_ptr<DataType>& type,

From 9e457acd392ded97402afa97fcea85c807cb9886 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Sun, 9 Dec 2018 10:08:44 +0900
Subject: [PATCH 22/40] Remove needless virtual specifiers

---
 cpp/src/arrow/sparse_tensor.h | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index e25703a47e0..95da9cc1a35 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -112,7 +112,7 @@ class ARROW_EXPORT SparseTensorBase {
  public:
   virtual ~SparseTensorBase() = default;
 
-  virtual SparseTensorFormat::type sparse_tensor_format_id() const = 0;
+  SparseTensorFormat::type sparse_tensor_format_id() const { return sparse_index_->format_id(); }
 
   std::shared_ptr<DataType> type() const { return type_; }
   std::shared_ptr<Buffer> data() const { return data_; }
@@ -135,7 +135,7 @@ class ARROW_EXPORT SparseTensorBase {
   bool is_mutable() const { return data_->is_mutable(); }
 
   /// Total number of non-zero cells in the sparse tensor
-  virtual int64_t length() const = 0;
+  int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
 
   bool Equals(const SparseTensorBase& other) const;
 
@@ -181,13 +181,6 @@ class ARROW_EXPORT SparseTensor : public SparseTensorBase {
   // Constructor with a dense tensor
   explicit SparseTensor(const Tensor& tensor);
 
-  SparseTensorFormat::type sparse_tensor_format_id() const {
-    return SparseIndexType::format_id;
-  }
-
-  /// Total number of non-zero cells in the sparse tensor
-  int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
-
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensor);
 };

From 401ae8023a74058e26ae795b6c490e8e02156f64 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Sun, 9 Dec 2018 10:22:00 +0900
Subject: [PATCH 23/40] Fix SparseCSRIndex::ToString and add tests

---
 cpp/src/arrow/sparse_tensor-test.cc | 28 ++++++++++++++--------------
 cpp/src/arrow/sparse_tensor.cc      |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 64778ca33b6..86323e68b5a 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -92,9 +92,10 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCOOIndex> si =
-      std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
-  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
+  const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
+  ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
+
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
 
@@ -154,9 +155,8 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCOOIndex> si =
-      std::dynamic_pointer_cast<SparseCOOIndex>(st1.sparse_index());
-  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si->indices();
+  const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
+  std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
   ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
   ASSERT_TRUE(sidx->is_column_major());
 
@@ -219,24 +219,24 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
     ASSERT_EQ(i + 11, ptr[i + 6]);
   }
 
-  std::shared_ptr<SparseCSRIndex> si =
-      std::dynamic_pointer_cast<SparseCSRIndex>(st1.sparse_index());
+  const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
 
-  ASSERT_EQ(1, si->indptr()->ndim());
-  ASSERT_EQ(1, si->indices()->ndim());
+  ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
+  ASSERT_EQ(1, si.indptr()->ndim());
+  ASSERT_EQ(1, si.indices()->ndim());
 
   const int64_t* indptr_begin =
-      reinterpret_cast<const int64_t*>(si->indptr()->raw_data());
+      reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
   std::vector<int64_t> indptr_values(indptr_begin,
-                                     indptr_begin + si->indptr()->shape()[0]);
+                                     indptr_begin + si.indptr()->shape()[0]);
 
   ASSERT_EQ(7, indptr_values.size());
   ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
 
   const int64_t* indices_begin =
-      reinterpret_cast<const int64_t*>(si->indices()->raw_data());
+      reinterpret_cast<const int64_t*>(si.indices()->raw_data());
   std::vector<int64_t> indices_values(indices_begin,
-                                      indices_begin + si->indices()->shape()[0]);
+                                      indices_begin + si.indices()->shape()[0]);
 
   ASSERT_EQ(12, indices_values.size());
   ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 844be035fd7..964a6c6f29e 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -304,7 +304,7 @@ SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
   DCHECK_EQ(1, indices_->ndim());
 }
 
-std::string SparseCSRIndex::ToString() const { return std::string("SparseCOOIndex"); }
+std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRIndex"); }
 
 // ----------------------------------------------------------------------
 // SparseTensorBase

From 99b1d1d4d9b38d87557b6135763c8362542fd69b Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Sun, 9 Dec 2018 08:47:48 +0900
Subject: [PATCH 24/40] Add missing ARROW_EXPORT specifiers

---
 cpp/src/arrow/sparse_tensor.cc | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 964a6c6f29e..dadb29e156c 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -421,17 +421,17 @@ SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
 
 #define INSTANTIATE_SPARSE_TENSOR(IndexType)                                           \
   template class ARROW_TEMPLATE_EXPORT SparseTensor<IndexType>;                        \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt8Type>&);     \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt16Type>&);    \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt32Type>&);    \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt64Type>&);    \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int8Type>&);      \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int16Type>&);     \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int32Type>&);     \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int64Type>&);     \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<HalfFloatType>&); \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<FloatType>&);     \
-  template SparseTensor<IndexType>::SparseTensor(const NumericTensor<DoubleType>&)
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt8Type>&);     \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt16Type>&);    \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt32Type>&);    \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt64Type>&);    \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int8Type>&);      \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int16Type>&);     \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int32Type>&);     \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int64Type>&);     \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<HalfFloatType>&); \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<FloatType>&);     \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<DoubleType>&)
 
 INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex);
 INSTANTIATE_SPARSE_TENSOR(SparseCSRIndex);

From 43d8eea44860da31530b964638626c89128cd208 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Sun, 9 Dec 2018 10:29:27 +0900
Subject: [PATCH 25/40] Fix coding style

---
 cpp/src/arrow/sparse_tensor-test.cc |  3 +--
 cpp/src/arrow/sparse_tensor.cc      | 37 +++++++++++++++++++----------
 cpp/src/arrow/sparse_tensor.h       |  4 +++-
 3 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 86323e68b5a..9c648b85caa 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -225,8 +225,7 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   ASSERT_EQ(1, si.indptr()->ndim());
   ASSERT_EQ(1, si.indices()->ndim());
 
-  const int64_t* indptr_begin =
-      reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
+  const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
   std::vector<int64_t> indptr_values(indptr_begin,
                                      indptr_begin + si.indptr()->shape()[0]);
 
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index dadb29e156c..896750c9ca5 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -419,19 +419,30 @@ SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
 // ----------------------------------------------------------------------
 // Instantiate templates
 
-#define INSTANTIATE_SPARSE_TENSOR(IndexType)                                           \
-  template class ARROW_TEMPLATE_EXPORT SparseTensor<IndexType>;                        \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt8Type>&);     \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt16Type>&);    \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt32Type>&);    \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<UInt64Type>&);    \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int8Type>&);      \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int16Type>&);     \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int32Type>&);     \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<Int64Type>&);     \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<HalfFloatType>&); \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<FloatType>&);     \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(const NumericTensor<DoubleType>&)
+#define INSTANTIATE_SPARSE_TENSOR(IndexType)                    \
+  template class ARROW_TEMPLATE_EXPORT SparseTensor<IndexType>; \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<UInt8Type>&);                         \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<UInt16Type>&);                        \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<UInt32Type>&);                        \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<UInt64Type>&);                        \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<Int8Type>&);                          \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<Int16Type>&);                         \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<Int32Type>&);                         \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<Int64Type>&);                         \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<HalfFloatType>&);                     \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<FloatType>&);                         \
+  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+      const NumericTensor<DoubleType>&)
 
 INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex);
 INSTANTIATE_SPARSE_TENSOR(SparseCSRIndex);
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 95da9cc1a35..db3d5cc14d7 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -112,7 +112,9 @@ class ARROW_EXPORT SparseTensorBase {
  public:
   virtual ~SparseTensorBase() = default;
 
-  SparseTensorFormat::type sparse_tensor_format_id() const { return sparse_index_->format_id(); }
+  SparseTensorFormat::type sparse_tensor_format_id() const {
+    return sparse_index_->format_id();
+  }
 
   std::shared_ptr<DataType> type() const { return type_; }
   std::shared_ptr<Buffer> data() const { return data_; }

From 357860d8c7c491d6b35ff0e19c4284525870da43 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Thu, 13 Dec 2018 09:48:25 +0900
Subject: [PATCH 26/40] Fix typo in comments

---
 format/Tensor.fbs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index 74000f90259..936e4c56919 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -72,7 +72,7 @@ table SparseTensorIndexCOO {
   ///   X[0, 1, 2, 1] := 5
   ///   X[1, 2, 0, 4] := 6
   ///
-  /// In COO format, the index matrix of X is the following 4x10 matrix:
+  /// In COO format, the index matrix of X is the following 4x6 matrix:
   ///
   ///   [[0, 0, 0, 0, 1, 1],
   ///    [1, 1, 1, 2, 1, 2],

From 7e814de365b92ee0900c569fdfe26f1983b6b014 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Thu, 13 Dec 2018 09:51:03 +0900
Subject: [PATCH 27/40] Put EXPERIMENTAL markn in comments

---
 cpp/src/arrow/ipc/reader.h | 2 +-
 cpp/src/arrow/ipc/writer.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index ca15d9c5357..6668418e7ca 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -236,7 +236,7 @@ Status ReadTensor(io::InputStream* file, std::shared_ptr<Tensor>* out);
 ARROW_EXPORT
 Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out);
 
-/// \brief Read arrow::SparseTensor as encapsulated IPC message in file
+/// \brief EXPERIMETNAL: Read arrow::SparseTensor as encapsulated IPC message in file
 ///
 /// \param[in] file an InputStream pointed at the start of the message
 /// \param[out] out the read sparse tensor
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 996f0ff7302..1a39dfc49d5 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -270,7 +270,7 @@ ARROW_EXPORT
 Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
                    int64_t* body_length);
 
-// \brief Write arrow::SparseTensor as a contiguous mesasge. The metadata,
+// \brief EXPERIMETNAL: Write arrow::SparseTensor as a contiguous mesasge. The metadata,
 // sparse index, and body are written assuming 64-byte alignment. It is the
 // user's responsibility to ensure that the OutputStream has been aligned
 // to a 64-byte multiple before writing the message.

From f78230344de9567e245422548b2105da81b56ade Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Thu, 13 Dec 2018 10:01:26 +0900
Subject: [PATCH 28/40] Return Status::IOError instead of DCHECK if message
 header type is not matched

---
 cpp/src/arrow/ipc/metadata-internal.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 7d301b1d4e0..6964aa36730 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -1041,7 +1041,7 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>
                                SparseTensorFormat::type* sparse_tensor_format_id) {
   auto message = flatbuf::GetMessage(metadata.data());
   if (message->header_type() != flatbuf::MessageHeader_SparseTensor) {
-    DCHECK_EQ(message->header_type(), flatbuf::MessageHeader_SparseTensor);
+    return Status::IOError("Header of flatbuffer-encoded Message is not SparseTensor.");
   }
   if (message->header() == nullptr) {
     return Status::IOError("Header-pointer of flatbuffer-encoded Message is null.");

From ff3ea71c5a0b816251d8c786ac680f5c015368fb Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Thu, 13 Dec 2018 10:23:41 +0900
Subject: [PATCH 29/40] Rename length to non_zero_length in SparseTensor

---
 cpp/src/arrow/compare.cc               |  6 +++---
 cpp/src/arrow/ipc/metadata-internal.cc |  8 ++++----
 cpp/src/arrow/ipc/read-write-test.cc   |  4 ++--
 cpp/src/arrow/ipc/reader.cc            | 26 +++++++++++++-------------
 cpp/src/arrow/sparse_tensor-test.cc    | 10 +++++-----
 cpp/src/arrow/sparse_tensor.h          | 14 +++++++-------
 format/Tensor.fbs                      |  2 +-
 7 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 86bf87b41b2..1d7a2cc6bb2 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -800,7 +800,7 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
                       const SparseTensor<SparseIndexType>& right) {
     DCHECK(left.type()->id() == right.type()->id());
     DCHECK(left.shape() == right.shape());
-    DCHECK(left.length() == right.length());
+    DCHECK(left.non_zero_length() == right.non_zero_length());
 
     const auto& left_index = checked_cast<const SparseIndexType&>(*left.sparse_index());
     const auto& right_index = checked_cast<const SparseIndexType&>(*right.sparse_index());
@@ -816,7 +816,7 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
     const uint8_t* left_data = left.data()->data();
     const uint8_t* right_data = right.data()->data();
 
-    return memcmp(left_data, right_data, static_cast<size_t>(byte_width * left.length()));
+    return memcmp(left_data, right_data, static_cast<size_t>(byte_width * left.non_zero_length()));
   }
 };
 
@@ -852,7 +852,7 @@ bool SparseTensorEquals(const SparseTensorBase& left, const SparseTensorBase& ri
     return true;
   } else if (left.shape() != right.shape()) {
     return false;
-  } else if (left.length() != right.length()) {
+  } else if (left.non_zero_length() != right.non_zero_length()) {
     return false;
   }
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 6964aa36730..0c968a359b3 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -863,9 +863,9 @@ Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor,
   const BufferMetadata& data_metadata = buffers[num_index_buffers];
   flatbuf::Buffer data(data_metadata.offset, data_metadata.length);
 
-  int64_t length = sparse_tensor.length();
+  const int64_t non_zero_length = sparse_tensor.non_zero_length();
 
-  *offset = flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, length,
+  *offset = flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length,
                                         fb_sparse_index_type, fb_sparse_index, &data);
 
   return Status::OK();
@@ -1037,7 +1037,7 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
 
 Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
                                std::vector<int64_t>* shape,
-                               std::vector<std::string>* dim_names, int64_t* length,
+                               std::vector<std::string>* dim_names, int64_t* non_zero_length,
                                SparseTensorFormat::type* sparse_tensor_format_id) {
   auto message = flatbuf::GetMessage(metadata.data());
   if (message->header_type() != flatbuf::MessageHeader_SparseTensor) {
@@ -1062,7 +1062,7 @@ Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>
     }
   }
 
-  *length = sparse_tensor->length();
+  *non_zero_length = sparse_tensor->non_zero_length();
 
   switch (sparse_tensor->sparseIndex_type()) {
     case flatbuf::SparseTensorIndex_SparseTensorIndexCOO:
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index 79a84a8497f..820708dcaa3 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -872,7 +872,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
 
   const auto& sparse_index = checked_cast<const SparseCOOIndex&>(*tensor.sparse_index());
   const int64_t indices_length = elem_size * sparse_index.indices()->size();
-  const int64_t data_length = elem_size * tensor.length();
+  const int64_t data_length = elem_size * tensor.non_zero_length();
   const int64_t expected_body_length = indices_length + data_length;
   ASSERT_EQ(expected_body_length, body_length);
 
@@ -905,7 +905,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
   const auto& sparse_index = checked_cast<const SparseCSRIndex&>(*tensor.sparse_index());
   const int64_t indptr_length = elem_size * sparse_index.indptr()->size();
   const int64_t indices_length = elem_size * sparse_index.indices()->size();
-  const int64_t data_length = elem_size * tensor.length();
+  const int64_t data_length = elem_size * tensor.non_zero_length();
   const int64_t expected_body_length = indptr_length + indices_length + data_length;
   ASSERT_EQ(expected_body_length, body_length);
 
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 62faaef0475..920977882b6 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -730,23 +730,23 @@ Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out) {
 namespace {
 
 Status ReadSparseCOOIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim,
-                          int64_t length, io::RandomAccessFile* file,
+                          int64_t non_zero_length, io::RandomAccessFile* file,
                           std::shared_ptr<SparseIndex>* out) {
   auto* sparse_index = sparse_tensor->sparseIndex_as_SparseTensorIndexCOO();
   auto* indices_buffer = sparse_index->indicesBuffer();
   std::shared_ptr<Buffer> indices_data;
   RETURN_NOT_OK(
       file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
-  std::vector<int64_t> shape({length, ndim});
+  std::vector<int64_t> shape({non_zero_length, ndim});
   const int64_t elsize = sizeof(int64_t);
-  std::vector<int64_t> strides({elsize, elsize * length});
+  std::vector<int64_t> strides({elsize, elsize * non_zero_length});
   *out = std::make_shared<SparseCOOIndex>(
       std::make_shared<SparseCOOIndex::CoordsTensor>(indices_data, shape, strides));
   return Status::OK();
 }
 
 Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t ndim,
-                          int64_t length, io::RandomAccessFile* file,
+                          int64_t non_zero_length, io::RandomAccessFile* file,
                           std::shared_ptr<SparseIndex>* out) {
   auto* sparse_index = sparse_tensor->sparseIndex_as_SparseMatrixIndexCSR();
 
@@ -761,7 +761,7 @@ Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t nd
       file->ReadAt(indices_buffer->offset(), indices_buffer->length(), &indices_data));
 
   std::vector<int64_t> indptr_shape({ndim + 1});
-  std::vector<int64_t> indices_shape({length});
+  std::vector<int64_t> indices_shape({non_zero_length});
   *out = std::make_shared<SparseCSRIndex>(
       std::make_shared<SparseCSRIndex::IndexTensor>(indptr_data, indptr_shape),
       std::make_shared<SparseCSRIndex::IndexTensor>(indices_data, indices_shape));
@@ -771,7 +771,7 @@ Status ReadSparseCSRIndex(const flatbuf::SparseTensor* sparse_tensor, int64_t nd
 Status MakeSparseTensorWithSparseCOOIndex(
     const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
     const std::vector<std::string>& dim_names,
-    const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t length,
+    const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
     const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
   auto* sparse_tensor =
       new SparseTensor<SparseCOOIndex>(sparse_index, type, data, shape, dim_names);
@@ -782,7 +782,7 @@ Status MakeSparseTensorWithSparseCOOIndex(
 Status MakeSparseTensorWithSparseCSRIndex(
     const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
     const std::vector<std::string>& dim_names,
-    const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t length,
+    const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
     const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
   auto* sparse_tensor =
       new SparseTensor<SparseCSRIndex>(sparse_index, type, data, shape, dim_names);
@@ -797,11 +797,11 @@ Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
   std::shared_ptr<DataType> type;
   std::vector<int64_t> shape;
   std::vector<std::string> dim_names;
-  int64_t length;
+  int64_t non_zero_length;
   SparseTensorFormat::type sparse_tensor_format_id;
 
   RETURN_NOT_OK(internal::GetSparseTensorMetadata(metadata, &type, &shape, &dim_names,
-                                                  &length, &sparse_tensor_format_id));
+                                                  &non_zero_length, &sparse_tensor_format_id));
 
   auto message = flatbuf::GetMessage(metadata.data());
   auto sparse_tensor = reinterpret_cast<const flatbuf::SparseTensor*>(message->header());
@@ -817,17 +817,17 @@ Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
   switch (sparse_tensor_format_id) {
     case SparseTensorFormat::COO:
       RETURN_NOT_OK(
-          ReadSparseCOOIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
+          ReadSparseCOOIndex(sparse_tensor, shape.size(), non_zero_length, file, &sparse_index));
       return MakeSparseTensorWithSparseCOOIndex(
           type, shape, dim_names, std::dynamic_pointer_cast<SparseCOOIndex>(sparse_index),
-          length, data, out);
+          non_zero_length, data, out);
 
     case SparseTensorFormat::CSR:
       RETURN_NOT_OK(
-          ReadSparseCSRIndex(sparse_tensor, shape.size(), length, file, &sparse_index));
+          ReadSparseCSRIndex(sparse_tensor, shape.size(), non_zero_length, file, &sparse_index));
       return MakeSparseTensorWithSparseCSRIndex(
           type, shape, dim_names, std::dynamic_pointer_cast<SparseCSRIndex>(sparse_index),
-          length, data, out);
+          non_zero_length, data, out);
 
     default:
       return Status::Invalid("Unsupported sparse index format");
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 9c648b85caa..21f9991fe82 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -45,8 +45,8 @@ TEST(TestSparseCOOTensor, CreationEmptyTensor) {
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
   SparseTensor<SparseCOOIndex> st2(int64(), shape, dim_names);
 
-  ASSERT_EQ(0, st1.length());
-  ASSERT_EQ(0, st2.length());
+  ASSERT_EQ(0, st1.non_zero_length());
+  ASSERT_EQ(0, st2.non_zero_length());
 
   ASSERT_EQ(24, st1.size());
   ASSERT_EQ(24, st2.size());
@@ -73,7 +73,7 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
 
   CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
 
-  ASSERT_EQ(12, st1.length());
+  ASSERT_EQ(12, st1.non_zero_length());
   ASSERT_TRUE(st1.is_mutable());
 
   ASSERT_EQ("foo", st2.dim_name(0));
@@ -136,7 +136,7 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
   SparseTensor<SparseCOOIndex> st1(tensor1);
   SparseTensor<SparseCOOIndex> st2(tensor2);
 
-  ASSERT_EQ(12, st1.length());
+  ASSERT_EQ(12, st1.non_zero_length());
   ASSERT_TRUE(st1.is_mutable());
 
   ASSERT_EQ("foo", st2.dim_name(0));
@@ -200,7 +200,7 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
 
   CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
 
-  ASSERT_EQ(12, st1.length());
+  ASSERT_EQ(12, st1.non_zero_length());
   ASSERT_TRUE(st1.is_mutable());
 
   ASSERT_EQ("foo", st2.dim_name(0));
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index db3d5cc14d7..359ee4600ba 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -32,26 +32,26 @@ namespace arrow {
 
 class ARROW_EXPORT SparseIndex {
  public:
-  explicit SparseIndex(SparseTensorFormat::type format_id, int64_t length)
-      : format_id_(format_id), length_(length) {}
+  explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length)
+      : format_id_(format_id), non_zero_length_(non_zero_length) {}
 
   virtual ~SparseIndex() = default;
 
   SparseTensorFormat::type format_id() const { return format_id_; }
-  int64_t length() const { return length_; }
+  int64_t non_zero_length() const { return non_zero_length_; }
 
   virtual std::string ToString() const = 0;
 
  protected:
   SparseTensorFormat::type format_id_;
-  int64_t length_;
+  int64_t non_zero_length_;
 };
 
 template <typename SparseIndexType>
 class SparseIndexBase : public SparseIndex {
  public:
-  explicit SparseIndexBase(int64_t length)
-      : SparseIndex(SparseIndexType::format_id, length) {}
+  explicit SparseIndexBase(int64_t non_zero_length)
+      : SparseIndex(SparseIndexType::format_id, non_zero_length) {}
 };
 
 // ----------------------------------------------------------------------
@@ -137,7 +137,7 @@ class ARROW_EXPORT SparseTensorBase {
   bool is_mutable() const { return data_->is_mutable(); }
 
   /// Total number of non-zero cells in the sparse tensor
-  int64_t length() const { return sparse_index_ ? sparse_index_->length() : 0; }
+  int64_t non_zero_length() const { return sparse_index_ ? sparse_index_->non_zero_length() : 0; }
 
   bool Equals(const SparseTensorBase& other) const;
 
diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index 936e4c56919..c1df6b2d1a9 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -134,7 +134,7 @@ table SparseTensor {
   shape: [TensorDim];
 
   /// The number of non-zero values in a sparse tensor.
-  length: long;
+  non_zero_length: long;
 
   /// Sparse tensor index
   sparseIndex: SparseTensorIndex;

From 6f291581edc5dbca306708a652ea76657155dfcd Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Mon, 7 Jan 2019 23:53:44 +0900
Subject: [PATCH 30/40] Mark APIs for sparse tensor as EXPERIMENTAL

---
 cpp/src/arrow/compare.h               | 1 +
 cpp/src/arrow/ipc/metadata-internal.h | 1 +
 cpp/src/arrow/ipc/writer.h            | 2 +-
 cpp/src/arrow/sparse_tensor.h         | 6 ++++++
 cpp/src/arrow/sparse_tensor_format.h  | 1 +
 format/Tensor.fbs                     | 2 +-
 6 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h
index bc4877dcbd4..6067b7929ab 100644
--- a/cpp/src/arrow/compare.h
+++ b/cpp/src/arrow/compare.h
@@ -36,6 +36,7 @@ bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right);
 
 bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right);
 
+/// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal
 bool ARROW_EXPORT SparseTensorEquals(const SparseTensorBase& left,
                                      const SparseTensorBase& right);
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 420fca8a0bd..bff3dd02231 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -105,6 +105,7 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
                          std::vector<int64_t>* shape, std::vector<int64_t>* strides,
                          std::vector<std::string>* dim_names);
 
+// EXPERIMENTAL: Extracting metadata of a sparse tensor from the message
 Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
                                std::vector<int64_t>* shape,
                                std::vector<std::string>* dim_names, int64_t* length,
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 1a39dfc49d5..56c1672065c 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -270,7 +270,7 @@ ARROW_EXPORT
 Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
                    int64_t* body_length);
 
-// \brief EXPERIMETNAL: Write arrow::SparseTensor as a contiguous mesasge. The metadata,
+// \brief EXPERIMENTAL: Write arrow::SparseTensor as a contiguous mesasge. The metadata,
 // sparse index, and body are written assuming 64-byte alignment. It is the
 // user's responsibility to ensure that the OutputStream has been aligned
 // to a 64-byte multiple before writing the message.
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 359ee4600ba..bfa00e487c9 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -30,6 +30,8 @@ namespace arrow {
 // ----------------------------------------------------------------------
 // SparseIndex class
 
+/// \brief EXPERIMENTAL: The base class for representing index of non-zero
+/// values in sparse tensor
 class ARROW_EXPORT SparseIndex {
  public:
   explicit SparseIndex(SparseTensorFormat::type format_id, int64_t non_zero_length)
@@ -57,6 +59,7 @@ class SparseIndexBase : public SparseIndex {
 // ----------------------------------------------------------------------
 // SparseCOOIndex class
 
+/// \brief EXPERIMENTAL: The index data for COO sparse tensor
 class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
  public:
   using CoordsTensor = NumericTensor<Int64Type>;
@@ -81,6 +84,7 @@ class ARROW_EXPORT SparseCOOIndex : public SparseIndexBase<SparseCOOIndex> {
 // ----------------------------------------------------------------------
 // SparseCSRIndex class
 
+/// \brief EXPERIMENTAL: The index data for CSR sparse matrix
 class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
  public:
   using IndexTensor = NumericTensor<Int64Type>;
@@ -108,6 +112,7 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
 // ----------------------------------------------------------------------
 // SparseTensorBase class
 
+/// \brief EXPERIMENTAL: The base class of sparse tensor container
 class ARROW_EXPORT SparseTensorBase {
  public:
   virtual ~SparseTensorBase() = default;
@@ -160,6 +165,7 @@ class ARROW_EXPORT SparseTensorBase {
 // ----------------------------------------------------------------------
 // SparseTensor class
 
+/// \brief EXPERIMENTAL: Concrete sparse tensor classes with sparse index type
 template <typename SparseIndexType>
 class ARROW_EXPORT SparseTensor : public SparseTensorBase {
  public:
diff --git a/cpp/src/arrow/sparse_tensor_format.h b/cpp/src/arrow/sparse_tensor_format.h
index 24c1a190f50..813378ff0e9 100644
--- a/cpp/src/arrow/sparse_tensor_format.h
+++ b/cpp/src/arrow/sparse_tensor_format.h
@@ -18,6 +18,7 @@
 #ifndef ARROW_SPARSE_TENSOR_FORMAT_H
 #define ARROW_SPARSE_TENSOR_FORMAT_H
 
+/// \brief EXPERIMENTAL: Sparse tensor format enumeration
 struct SparseTensorFormat {
   enum type { COO, CSR };
 };
diff --git a/format/Tensor.fbs b/format/Tensor.fbs
index c1df6b2d1a9..e77b353a0f3 100644
--- a/format/Tensor.fbs
+++ b/format/Tensor.fbs
@@ -53,7 +53,7 @@ table Tensor {
 root_type Tensor;
 
 /// ----------------------------------------------------------------------
-/// Data structures for sparse tensors
+/// EXPERIMENTAL: Data structures for sparse tensors
 
 /// Coodinate format of sparse tensor index.
 table SparseTensorIndexCOO {

From 6ef6ad065a21e5b3c01b3fb62508b10de716519d Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Tue, 8 Jan 2019 00:56:00 +0900
Subject: [PATCH 31/40] Apply code formatter

---
 cpp/src/arrow/compare.cc               |  3 ++-
 cpp/src/arrow/ipc/metadata-internal.cc |  8 +++++---
 cpp/src/arrow/ipc/reader.cc            | 12 ++++++------
 cpp/src/arrow/sparse_tensor.h          |  4 +++-
 4 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 1d7a2cc6bb2..4230c24676f 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -816,7 +816,8 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
     const uint8_t* left_data = left.data()->data();
     const uint8_t* right_data = right.data()->data();
 
-    return memcmp(left_data, right_data, static_cast<size_t>(byte_width * left.non_zero_length()));
+    return memcmp(left_data, right_data,
+                  static_cast<size_t>(byte_width * left.non_zero_length()));
   }
 };
 
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 0c968a359b3..aafa2e3fe82 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -865,8 +865,9 @@ Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor,
 
   const int64_t non_zero_length = sparse_tensor.non_zero_length();
 
-  *offset = flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length,
-                                        fb_sparse_index_type, fb_sparse_index, &data);
+  *offset =
+      flatbuf::CreateSparseTensor(fbb, fb_type_type, fb_type, fb_shape, non_zero_length,
+                                  fb_sparse_index_type, fb_sparse_index, &data);
 
   return Status::OK();
 }
@@ -1037,7 +1038,8 @@ Status GetTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type
 
 Status GetSparseTensorMetadata(const Buffer& metadata, std::shared_ptr<DataType>* type,
                                std::vector<int64_t>* shape,
-                               std::vector<std::string>* dim_names, int64_t* non_zero_length,
+                               std::vector<std::string>* dim_names,
+                               int64_t* non_zero_length,
                                SparseTensorFormat::type* sparse_tensor_format_id) {
   auto message = flatbuf::GetMessage(metadata.data());
   if (message->header_type() != flatbuf::MessageHeader_SparseTensor) {
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 920977882b6..d930ab380c4 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -800,8 +800,8 @@ Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
   int64_t non_zero_length;
   SparseTensorFormat::type sparse_tensor_format_id;
 
-  RETURN_NOT_OK(internal::GetSparseTensorMetadata(metadata, &type, &shape, &dim_names,
-                                                  &non_zero_length, &sparse_tensor_format_id));
+  RETURN_NOT_OK(internal::GetSparseTensorMetadata(
+      metadata, &type, &shape, &dim_names, &non_zero_length, &sparse_tensor_format_id));
 
   auto message = flatbuf::GetMessage(metadata.data());
   auto sparse_tensor = reinterpret_cast<const flatbuf::SparseTensor*>(message->header());
@@ -816,15 +816,15 @@ Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
   std::shared_ptr<SparseIndex> sparse_index;
   switch (sparse_tensor_format_id) {
     case SparseTensorFormat::COO:
-      RETURN_NOT_OK(
-          ReadSparseCOOIndex(sparse_tensor, shape.size(), non_zero_length, file, &sparse_index));
+      RETURN_NOT_OK(ReadSparseCOOIndex(sparse_tensor, shape.size(), non_zero_length, file,
+                                       &sparse_index));
       return MakeSparseTensorWithSparseCOOIndex(
           type, shape, dim_names, std::dynamic_pointer_cast<SparseCOOIndex>(sparse_index),
           non_zero_length, data, out);
 
     case SparseTensorFormat::CSR:
-      RETURN_NOT_OK(
-          ReadSparseCSRIndex(sparse_tensor, shape.size(), non_zero_length, file, &sparse_index));
+      RETURN_NOT_OK(ReadSparseCSRIndex(sparse_tensor, shape.size(), non_zero_length, file,
+                                       &sparse_index));
       return MakeSparseTensorWithSparseCSRIndex(
           type, shape, dim_names, std::dynamic_pointer_cast<SparseCSRIndex>(sparse_index),
           non_zero_length, data, out);
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index bfa00e487c9..8ce78c81c46 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -142,7 +142,9 @@ class ARROW_EXPORT SparseTensorBase {
   bool is_mutable() const { return data_->is_mutable(); }
 
   /// Total number of non-zero cells in the sparse tensor
-  int64_t non_zero_length() const { return sparse_index_ ? sparse_index_->non_zero_length() : 0; }
+  int64_t non_zero_length() const {
+    return sparse_index_ ? sparse_index_->non_zero_length() : 0;
+  }
 
   bool Equals(const SparseTensorBase& other) const;
 

From 3dd434c83c4a7ea113b716227bf95e4167cc540b Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:03:33 +0900
Subject: [PATCH 32/40] Capitalize member function name

---
 cpp/src/arrow/compare.cc | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 4230c24676f..f64428008db 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -787,7 +787,7 @@ namespace {
 
 template <typename LeftSparseIndexType, typename RightSparseIndexType>
 struct SparseTensorEqualsImpl {
-  static bool compare(const SparseTensor<LeftSparseIndexType>& left,
+  static bool Compare(const SparseTensor<LeftSparseIndexType>& left,
                       const SparseTensor<RightSparseIndexType>& right) {
     // TODO(mrkn): should we support the equality among different formats?
     return false;
@@ -796,7 +796,7 @@ struct SparseTensorEqualsImpl {
 
 template <typename SparseIndexType>
 struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
-  static bool compare(const SparseTensor<SparseIndexType>& left,
+  static bool Compare(const SparseTensor<SparseIndexType>& left,
                       const SparseTensor<SparseIndexType>& right) {
     DCHECK(left.type()->id() == right.type()->id());
     DCHECK(left.shape() == right.shape());
@@ -827,13 +827,13 @@ inline bool SparseTensorEqualsImplDispatch(const SparseTensor<SparseTensorType>&
   switch (right.sparse_tensor_format_id()) {
     case SparseTensorFormat::COO: {
       const auto& right_coo = checked_cast<const SparseTensor<SparseCOOIndex>&>(right);
-      return SparseTensorEqualsImpl<SparseTensorType, SparseCOOIndex>::compare(left,
+      return SparseTensorEqualsImpl<SparseTensorType, SparseCOOIndex>::Compare(left,
                                                                                right_coo);
     }
 
     case SparseTensorFormat::CSR: {
       const auto& right_csr = checked_cast<const SparseTensor<SparseCSRIndex>&>(right);
-      return SparseTensorEqualsImpl<SparseTensorType, SparseCSRIndex>::compare(left,
+      return SparseTensorEqualsImpl<SparseTensorType, SparseCSRIndex>::Compare(left,
                                                                                right_csr);
     }
 

From 97e85bd3535f7896164847a07a4d4e0b82d0cb81 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:08:36 +0900
Subject: [PATCH 33/40] Use std::make_shared

---
 cpp/src/arrow/ipc/reader.cc | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index d930ab380c4..4aebccdb8b3 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -773,9 +773,7 @@ Status MakeSparseTensorWithSparseCOOIndex(
     const std::vector<std::string>& dim_names,
     const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
     const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
-  auto* sparse_tensor =
-      new SparseTensor<SparseCOOIndex>(sparse_index, type, data, shape, dim_names);
-  *out = std::shared_ptr<SparseTensorBase>(sparse_tensor);
+  *out = std::make_shared<SparseTensor<SparseCOOIndex>>(sparse_index, type, data, shape, dim_names);
   return Status::OK();
 }
 
@@ -784,9 +782,7 @@ Status MakeSparseTensorWithSparseCSRIndex(
     const std::vector<std::string>& dim_names,
     const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
     const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
-  auto* sparse_tensor =
-      new SparseTensor<SparseCSRIndex>(sparse_index, type, data, shape, dim_names);
-  *out = std::shared_ptr<SparseTensorBase>(sparse_tensor);
+  *out = std::make_shared<SparseTensor<SparseCSRIndex>>(sparse_index, type, data, shape, dim_names);
   return Status::OK();
 }
 

From 37a0a14c6b6232ddda0aa3648cff63c741c0e31d Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:11:36 +0900
Subject: [PATCH 34/40] Remove needless function declaration

---
 cpp/src/arrow/ipc/reader.h | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index 6668418e7ca..0d49f84dbb7 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -244,14 +244,6 @@ Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out);
 ARROW_EXPORT
 Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensorBase>* out);
 
-/// \brief EXPERIMENTAL: Read arrow::Tensor from IPC message
-///
-/// \param[in] message a Message containing the tensor metadata and body
-/// \param[out] out the read tensor
-/// \return Status
-ARROW_EXPORT
-Status ReadTensor(const Message& message, std::shared_ptr<SparseTensorBase>* out);
-
 /// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
 ///
 /// \param[in] message a Message containing the tensor metadata and body

From 07a6518632203b3ab88be19d7ac46b17133d51f9 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:13:21 +0900
Subject: [PATCH 35/40] Use substitution instead of constructor call

---
 cpp/src/arrow/sparse_tensor-test.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 21f9991fe82..502c746f27a 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -64,7 +64,7 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
   std::vector<int64_t> shape = {2, 3, 4};
   std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
                                  0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
-  std::shared_ptr<Buffer> buffer(Buffer::Wrap(values));
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
   NumericTensor<Int64Type> tensor1(buffer, shape);
   NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
@@ -129,7 +129,7 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
   std::vector<int64_t> shape = {2, 3, 4};
   std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
                                  0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
-  std::shared_ptr<Buffer> buffer(Buffer::Wrap(values));
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
   Tensor tensor1(int64(), buffer, shape);
   Tensor tensor2(int64(), buffer, shape, {}, dim_names);
@@ -190,7 +190,7 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   std::vector<int64_t> shape = {6, 4};
   std::vector<int64_t> values = {1, 0,  2, 0,  0,  3, 0,  4, 5, 0,  6, 0,
                                  0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
-  std::shared_ptr<Buffer> buffer(Buffer::Wrap(values));
+  std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
   NumericTensor<Int64Type> tensor1(buffer, shape);
   NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);

From 90e8b316674f00cb403ec16aa3eaf74a59afa93d Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:38:34 +0900
Subject: [PATCH 36/40] Rename sparse tensor classes

- SparseTensorBase to SparseTensor
- SparseTensor<...> to SparseTensorImpl<...>
---
 cpp/src/arrow/compare.cc               | 28 +++++++-------
 cpp/src/arrow/compare.h                |  6 +--
 cpp/src/arrow/ipc/metadata-internal.cc |  4 +-
 cpp/src/arrow/ipc/metadata-internal.h  |  4 +-
 cpp/src/arrow/ipc/read-write-test.cc   | 14 +++----
 cpp/src/arrow/ipc/reader.cc            | 14 +++----
 cpp/src/arrow/ipc/reader.h             |  6 +--
 cpp/src/arrow/ipc/writer.cc            |  8 ++--
 cpp/src/arrow/ipc/writer.h             |  4 +-
 cpp/src/arrow/sparse_tensor-test.cc    | 18 ++++-----
 cpp/src/arrow/sparse_tensor.cc         | 52 +++++++++++++-------------
 cpp/src/arrow/sparse_tensor.h          | 44 +++++++++++-----------
 cpp/src/arrow/tensor.h                 |  4 +-
 13 files changed, 103 insertions(+), 103 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index f64428008db..326aac44814 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -787,8 +787,8 @@ namespace {
 
 template <typename LeftSparseIndexType, typename RightSparseIndexType>
 struct SparseTensorEqualsImpl {
-  static bool Compare(const SparseTensor<LeftSparseIndexType>& left,
-                      const SparseTensor<RightSparseIndexType>& right) {
+  static bool Compare(const SparseTensorImpl<LeftSparseIndexType>& left,
+                      const SparseTensorImpl<RightSparseIndexType>& right) {
     // TODO(mrkn): should we support the equality among different formats?
     return false;
   }
@@ -796,8 +796,8 @@ struct SparseTensorEqualsImpl {
 
 template <typename SparseIndexType>
 struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
-  static bool Compare(const SparseTensor<SparseIndexType>& left,
-                      const SparseTensor<SparseIndexType>& right) {
+  static bool Compare(const SparseTensorImpl<SparseIndexType>& left,
+                      const SparseTensorImpl<SparseIndexType>& right) {
     DCHECK(left.type()->id() == right.type()->id());
     DCHECK(left.shape() == right.shape());
     DCHECK(left.non_zero_length() == right.non_zero_length());
@@ -821,19 +821,19 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
   }
 };
 
-template <typename SparseTensorType>
-inline bool SparseTensorEqualsImplDispatch(const SparseTensor<SparseTensorType>& left,
-                                           const SparseTensorBase& right) {
+template <typename SparseIndexType>
+inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexType>& left,
+                                           const SparseTensor& right) {
   switch (right.sparse_tensor_format_id()) {
     case SparseTensorFormat::COO: {
-      const auto& right_coo = checked_cast<const SparseTensor<SparseCOOIndex>&>(right);
-      return SparseTensorEqualsImpl<SparseTensorType, SparseCOOIndex>::Compare(left,
+      const auto& right_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseIndexType, SparseCOOIndex>::Compare(left,
                                                                                right_coo);
     }
 
     case SparseTensorFormat::CSR: {
-      const auto& right_csr = checked_cast<const SparseTensor<SparseCSRIndex>&>(right);
-      return SparseTensorEqualsImpl<SparseTensorType, SparseCSRIndex>::Compare(left,
+      const auto& right_csr = checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(right);
+      return SparseTensorEqualsImpl<SparseIndexType, SparseCSRIndex>::Compare(left,
                                                                                right_csr);
     }
 
@@ -844,7 +844,7 @@ inline bool SparseTensorEqualsImplDispatch(const SparseTensor<SparseTensorType>&
 
 }  // namespace
 
-bool SparseTensorEquals(const SparseTensorBase& left, const SparseTensorBase& right) {
+bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right) {
   if (&left == &right) {
     return true;
   } else if (left.type()->id() != right.type()->id()) {
@@ -859,12 +859,12 @@ bool SparseTensorEquals(const SparseTensorBase& left, const SparseTensorBase& ri
 
   switch (left.sparse_tensor_format_id()) {
     case SparseTensorFormat::COO: {
-      const auto& left_coo = checked_cast<const SparseTensor<SparseCOOIndex>&>(left);
+      const auto& left_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(left);
       return SparseTensorEqualsImplDispatch(left_coo, right);
     }
 
     case SparseTensorFormat::CSR: {
-      const auto& left_csr = checked_cast<const SparseTensor<SparseCSRIndex>&>(left);
+      const auto& left_csr = checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(left);
       return SparseTensorEqualsImplDispatch(left_csr, right);
     }
 
diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h
index 6067b7929ab..331e81bfd3a 100644
--- a/cpp/src/arrow/compare.h
+++ b/cpp/src/arrow/compare.h
@@ -29,7 +29,7 @@ namespace arrow {
 class Array;
 class DataType;
 class Tensor;
-class SparseTensorBase;
+class SparseTensor;
 
 /// Returns true if the arrays are exactly equal
 bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right);
@@ -37,8 +37,8 @@ bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right);
 bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right);
 
 /// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal
-bool ARROW_EXPORT SparseTensorEquals(const SparseTensorBase& left,
-                                     const SparseTensorBase& right);
+bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left,
+                                     const SparseTensor& right);
 
 /// Returns true if the arrays are approximately equal. For non-floating point
 /// types, this is equivalent to ArrayEquals(left, right)
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index aafa2e3fe82..8bc6623551b 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -836,7 +836,7 @@ Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index,
   return Status::OK();
 }
 
-Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor,
+Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor,
                         int64_t body_length, const std::vector<BufferMetadata>& buffers,
                         SparseTensorOffset* offset) {
   flatbuf::Type fb_type_type;
@@ -872,7 +872,7 @@ Status MakeSparseTensor(FBB& fbb, const SparseTensorBase& sparse_tensor,
   return Status::OK();
 }
 
-Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor,
+Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor,
                                 int64_t body_length,
                                 const std::vector<BufferMetadata>& buffers,
                                 std::shared_ptr<Buffer>* out) {
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index bff3dd02231..74a1aef8580 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -41,7 +41,7 @@ namespace arrow {
 class DataType;
 class Schema;
 class Tensor;
-class SparseTensorBase;
+class SparseTensor;
 
 namespace flatbuf = org::apache::arrow::flatbuf;
 
@@ -145,7 +145,7 @@ Status WriteRecordBatchMessage(const int64_t length, const int64_t body_length,
 Status WriteTensorMessage(const Tensor& tensor, const int64_t buffer_start_offset,
                           std::shared_ptr<Buffer>* out);
 
-Status WriteSparseTensorMessage(const SparseTensorBase& sparse_tensor,
+Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor,
                                 int64_t body_length,
                                 const std::vector<BufferMetadata>& buffers,
                                 std::shared_ptr<Buffer>* out);
diff --git a/cpp/src/arrow/ipc/read-write-test.cc b/cpp/src/arrow/ipc/read-write-test.cc
index 820708dcaa3..bc27386f34f 100644
--- a/cpp/src/arrow/ipc/read-write-test.cc
+++ b/cpp/src/arrow/ipc/read-write-test.cc
@@ -851,14 +851,14 @@ class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture
   void TearDown() { io::MemoryMapFixture::TearDown(); }
 
   template <typename SparseIndexType>
-  void CheckSparseTensorRoundTrip(const SparseTensor<SparseIndexType>& tensor) {
+  void CheckSparseTensorRoundTrip(const SparseTensorImpl<SparseIndexType>& tensor) {
     GTEST_FAIL();
   }
 };
 
 template <>
 void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
-    const SparseTensor<SparseCOOIndex>& tensor) {
+    const SparseTensorImpl<SparseCOOIndex>& tensor) {
   const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
   const int elem_size = type.bit_width() / 8;
 
@@ -878,7 +878,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
 
   ASSERT_OK(mmap_->Seek(0));
 
-  std::shared_ptr<SparseTensorBase> result;
+  std::shared_ptr<SparseTensor> result;
   ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
 
   const auto& resulted_sparse_index =
@@ -890,7 +890,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCOOIndex>(
 
 template <>
 void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
-    const SparseTensor<SparseCSRIndex>& tensor) {
+    const SparseTensorImpl<SparseCSRIndex>& tensor) {
   const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
   const int elem_size = type.bit_width() / 8;
 
@@ -911,7 +911,7 @@ void TestSparseTensorRoundTrip::CheckSparseTensorRoundTrip<SparseCSRIndex>(
 
   ASSERT_OK(mmap_->Seek(0));
 
-  std::shared_ptr<SparseTensorBase> result;
+  std::shared_ptr<SparseTensor> result;
   ASSERT_OK(ReadSparseTensor(mmap_.get(), &result));
 
   const auto& resulted_sparse_index =
@@ -934,7 +934,7 @@ TEST_F(TestSparseTensorRoundTrip, WithSparseCOOIndex) {
 
   auto data = Buffer::Wrap(values);
   NumericTensor<Int64Type> t(data, shape, {}, dim_names);
-  SparseTensor<SparseCOOIndex> st(t);
+  SparseTensorImpl<SparseCOOIndex> st(t);
 
   CheckSparseTensorRoundTrip(st);
 }
@@ -951,7 +951,7 @@ TEST_F(TestSparseTensorRoundTrip, WithSparseCSRIndex) {
 
   auto data = Buffer::Wrap(values);
   NumericTensor<Int64Type> t(data, shape, {}, dim_names);
-  SparseTensor<SparseCSRIndex> st(t);
+  SparseTensorImpl<SparseCSRIndex> st(t);
 
   CheckSparseTensorRoundTrip(st);
 }
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 4aebccdb8b3..1207427806b 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -772,8 +772,8 @@ Status MakeSparseTensorWithSparseCOOIndex(
     const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
     const std::vector<std::string>& dim_names,
     const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
-    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
-  *out = std::make_shared<SparseTensor<SparseCOOIndex>>(sparse_index, type, data, shape, dim_names);
+    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensor>* out) {
+  *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type, data, shape, dim_names);
   return Status::OK();
 }
 
@@ -781,15 +781,15 @@ Status MakeSparseTensorWithSparseCSRIndex(
     const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
     const std::vector<std::string>& dim_names,
     const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
-    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensorBase>* out) {
-  *out = std::make_shared<SparseTensor<SparseCSRIndex>>(sparse_index, type, data, shape, dim_names);
+    const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensor>* out) {
+  *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type, data, shape, dim_names);
   return Status::OK();
 }
 
 }  // namespace
 
 Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
-                        std::shared_ptr<SparseTensorBase>* out) {
+                        std::shared_ptr<SparseTensor>* out) {
   std::shared_ptr<DataType> type;
   std::vector<int64_t> shape;
   std::vector<std::string> dim_names;
@@ -830,12 +830,12 @@ Status ReadSparseTensor(const Buffer& metadata, io::RandomAccessFile* file,
   }
 }
 
-Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensorBase>* out) {
+Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensor>* out) {
   io::BufferReader buffer_reader(message.body());
   return ReadSparseTensor(*message.metadata(), &buffer_reader, out);
 }
 
-Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensorBase>* out) {
+Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensor>* out) {
   std::unique_ptr<Message> message;
   RETURN_NOT_OK(ReadContiguousPayload(file, &message));
   DCHECK_EQ(message->type(), Message::SPARSE_TENSOR);
diff --git a/cpp/src/arrow/ipc/reader.h b/cpp/src/arrow/ipc/reader.h
index 0d49f84dbb7..ebecea13ffb 100644
--- a/cpp/src/arrow/ipc/reader.h
+++ b/cpp/src/arrow/ipc/reader.h
@@ -33,7 +33,7 @@ class Buffer;
 class Schema;
 class Status;
 class Tensor;
-class SparseTensorBase;
+class SparseTensor;
 
 namespace io {
 
@@ -242,7 +242,7 @@ Status ReadTensor(const Message& message, std::shared_ptr<Tensor>* out);
 /// \param[out] out the read sparse tensor
 /// \return Status
 ARROW_EXPORT
-Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensorBase>* out);
+Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensor>* out);
 
 /// \brief EXPERIMENTAL: Read arrow::SparseTensor from IPC message
 ///
@@ -250,7 +250,7 @@ Status ReadSparseTensor(io::InputStream* file, std::shared_ptr<SparseTensorBase>
 /// \param[out] out the read sparse tensor
 /// \return Status
 ARROW_EXPORT
-Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensorBase>* out);
+Status ReadSparseTensor(const Message& message, std::shared_ptr<SparseTensor>* out);
 
 }  // namespace ipc
 }  // namespace arrow
diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc
index cd1d2773c0b..0bf68142c77 100644
--- a/cpp/src/arrow/ipc/writer.cc
+++ b/cpp/src/arrow/ipc/writer.cc
@@ -703,12 +703,12 @@ class SparseTensorSerializer {
     return Status::OK();
   }
 
-  Status SerializeMetadata(const SparseTensorBase& sparse_tensor) {
+  Status SerializeMetadata(const SparseTensor& sparse_tensor) {
     return WriteSparseTensorMessage(sparse_tensor, out_->body_length, buffer_meta_,
                                     &out_->metadata);
   }
 
-  Status Assemble(const SparseTensorBase& sparse_tensor) {
+  Status Assemble(const SparseTensor& sparse_tensor) {
     if (buffer_meta_.size() > 0) {
       buffer_meta_.clear();
       out_->body_buffers.clear();
@@ -753,7 +753,7 @@ class SparseTensorSerializer {
   int64_t buffer_start_offset_;
 };
 
-Status GetSparseTensorPayload(const SparseTensorBase& sparse_tensor, MemoryPool* pool,
+Status GetSparseTensorPayload(const SparseTensor& sparse_tensor, MemoryPool* pool,
                               IpcPayload* out) {
   SparseTensorSerializer writer(0, out);
   return writer.Assemble(sparse_tensor);
@@ -761,7 +761,7 @@ Status GetSparseTensorPayload(const SparseTensorBase& sparse_tensor, MemoryPool*
 
 }  // namespace internal
 
-Status WriteSparseTensor(const SparseTensorBase& sparse_tensor, io::OutputStream* dst,
+Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
                          int32_t* metadata_length, int64_t* body_length,
                          MemoryPool* pool) {
   internal::IpcPayload payload;
diff --git a/cpp/src/arrow/ipc/writer.h b/cpp/src/arrow/ipc/writer.h
index 56c1672065c..5feb9e90cb0 100644
--- a/cpp/src/arrow/ipc/writer.h
+++ b/cpp/src/arrow/ipc/writer.h
@@ -36,7 +36,7 @@ class Schema;
 class Status;
 class Table;
 class Tensor;
-class SparseTensorBase;
+class SparseTensor;
 
 namespace io {
 
@@ -280,7 +280,7 @@ Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadat
 // \param[out] metadata_length the actual metadata length, including padding
 // \param[out] body_length the actual message body length
 ARROW_EXPORT
-Status WriteSparseTensor(const SparseTensorBase& sparse_tensor, io::OutputStream* dst,
+Status WriteSparseTensor(const SparseTensor& sparse_tensor, io::OutputStream* dst,
                          int32_t* metadata_length, int64_t* body_length,
                          MemoryPool* pool);
 
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 502c746f27a..2e4cd2d0336 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -33,17 +33,17 @@
 namespace arrow {
 
 static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
-                                              const SparseTensorBase& sparse_tensor) {
+                                              const SparseTensor& sparse_tensor) {
   ASSERT_EQ(expected, sparse_tensor.sparse_tensor_format_id());
   ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
 }
 
 TEST(TestSparseCOOTensor, CreationEmptyTensor) {
   std::vector<int64_t> shape = {2, 3, 4};
-  SparseTensor<SparseCOOIndex> st1(int64(), shape);
+  SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
 
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
-  SparseTensor<SparseCOOIndex> st2(int64(), shape, dim_names);
+  SparseTensorImpl<SparseCOOIndex> st2(int64(), shape, dim_names);
 
   ASSERT_EQ(0, st1.non_zero_length());
   ASSERT_EQ(0, st2.non_zero_length());
@@ -68,8 +68,8 @@ TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
   NumericTensor<Int64Type> tensor1(buffer, shape);
   NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
-  SparseTensor<SparseCOOIndex> st1(tensor1);
-  SparseTensor<SparseCOOIndex> st2(tensor2);
+  SparseTensorImpl<SparseCOOIndex> st1(tensor1);
+  SparseTensorImpl<SparseCOOIndex> st2(tensor2);
 
   CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
 
@@ -133,8 +133,8 @@ TEST(TestSparseCOOTensor, CreationFromTensor) {
   std::vector<std::string> dim_names = {"foo", "bar", "baz"};
   Tensor tensor1(int64(), buffer, shape);
   Tensor tensor2(int64(), buffer, shape, {}, dim_names);
-  SparseTensor<SparseCOOIndex> st1(tensor1);
-  SparseTensor<SparseCOOIndex> st2(tensor2);
+  SparseTensorImpl<SparseCOOIndex> st1(tensor1);
+  SparseTensorImpl<SparseCOOIndex> st2(tensor2);
 
   ASSERT_EQ(12, st1.non_zero_length());
   ASSERT_TRUE(st1.is_mutable());
@@ -195,8 +195,8 @@ TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
   NumericTensor<Int64Type> tensor1(buffer, shape);
   NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
 
-  SparseTensor<SparseCSRIndex> st1(tensor1);
-  SparseTensor<SparseCSRIndex> st2(tensor2);
+  SparseTensorImpl<SparseCSRIndex> st1(tensor1);
+  SparseTensorImpl<SparseCSRIndex> st2(tensor2);
 
   CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
 
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index 896750c9ca5..eab7cacc211 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -307,10 +307,10 @@ SparseCSRIndex::SparseCSRIndex(const std::shared_ptr<IndexTensor>& indptr,
 std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRIndex"); }
 
 // ----------------------------------------------------------------------
-// SparseTensorBase
+// SparseTensor
 
 // Constructor with all attributes
-SparseTensorBase::SparseTensorBase(const std::shared_ptr<DataType>& type,
+SparseTensor::SparseTensor(const std::shared_ptr<DataType>& type,
                                    const std::shared_ptr<Buffer>& data,
                                    const std::vector<int64_t>& shape,
                                    const std::shared_ptr<SparseIndex>& sparse_index,
@@ -323,7 +323,7 @@ SparseTensorBase::SparseTensorBase(const std::shared_ptr<DataType>& type,
   DCHECK(is_tensor_supported(type->id()));
 }
 
-const std::string& SparseTensorBase::dim_name(int i) const {
+const std::string& SparseTensor::dim_name(int i) const {
   static const std::string kEmpty = "";
   if (dim_names_.size() == 0) {
     return kEmpty;
@@ -333,29 +333,29 @@ const std::string& SparseTensorBase::dim_name(int i) const {
   }
 }
 
-int64_t SparseTensorBase::size() const {
+int64_t SparseTensor::size() const {
   return std::accumulate(shape_.begin(), shape_.end(), 1LL, std::multiplies<int64_t>());
 }
 
-bool SparseTensorBase::Equals(const SparseTensorBase& other) const {
+bool SparseTensor::Equals(const SparseTensor& other) const {
   return SparseTensorEquals(*this, other);
 }
 
 // ----------------------------------------------------------------------
-// SparseTensor
+// SparseTensorImpl
 
 // Constructor with a dense tensor
 template <typename SparseIndexType>
-SparseTensor<SparseIndexType>::SparseTensor(const std::shared_ptr<DataType>& type,
-                                            const std::vector<int64_t>& shape,
-                                            const std::vector<std::string>& dim_names)
-    : SparseTensor(nullptr, type, nullptr, shape, dim_names) {}
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const std::shared_ptr<DataType>& type,
+                                                    const std::vector<int64_t>& shape,
+                                                    const std::vector<std::string>& dim_names)
+    : SparseTensorImpl(nullptr, type, nullptr, shape, dim_names) {}
 
 // Constructor with a dense tensor
 template <typename SparseIndexType>
 template <typename TYPE>
-SparseTensor<SparseIndexType>::SparseTensor(const NumericTensor<TYPE>& tensor)
-    : SparseTensor(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const NumericTensor<TYPE>& tensor)
+    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
   SparseTensorConverter<TYPE, SparseIndexType> converter(tensor);
   DCHECK_OK(converter.Convert());
   sparse_index_ = converter.sparse_index;
@@ -364,8 +364,8 @@ SparseTensor<SparseIndexType>::SparseTensor(const NumericTensor<TYPE>& tensor)
 
 // Constructor with a dense tensor
 template <typename SparseIndexType>
-SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
-    : SparseTensor(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const Tensor& tensor)
+    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
   switch (tensor.type()->id()) {
     case Type::UINT8:
       MakeSparseTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
@@ -420,28 +420,28 @@ SparseTensor<SparseIndexType>::SparseTensor(const Tensor& tensor)
 // Instantiate templates
 
 #define INSTANTIATE_SPARSE_TENSOR(IndexType)                    \
-  template class ARROW_TEMPLATE_EXPORT SparseTensor<IndexType>; \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorImpl<IndexType>; \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<UInt8Type>&);                         \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<UInt16Type>&);                        \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<UInt32Type>&);                        \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<UInt64Type>&);                        \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<Int8Type>&);                          \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<Int16Type>&);                         \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<Int32Type>&);                         \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<Int64Type>&);                         \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<HalfFloatType>&);                     \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<FloatType>&);                         \
-  template ARROW_EXPORT SparseTensor<IndexType>::SparseTensor(  \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
       const NumericTensor<DoubleType>&)
 
 INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex);
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 8ce78c81c46..a6f69240394 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -110,12 +110,12 @@ class ARROW_EXPORT SparseCSRIndex : public SparseIndexBase<SparseCSRIndex> {
 };
 
 // ----------------------------------------------------------------------
-// SparseTensorBase class
+// SparseTensor class
 
 /// \brief EXPERIMENTAL: The base class of sparse tensor container
-class ARROW_EXPORT SparseTensorBase {
+class ARROW_EXPORT SparseTensor {
  public:
-  virtual ~SparseTensorBase() = default;
+  virtual ~SparseTensor() = default;
 
   SparseTensorFormat::type sparse_tensor_format_id() const {
     return sparse_index_->format_id();
@@ -146,14 +146,14 @@ class ARROW_EXPORT SparseTensorBase {
     return sparse_index_ ? sparse_index_->non_zero_length() : 0;
   }
 
-  bool Equals(const SparseTensorBase& other) const;
+  bool Equals(const SparseTensor& other) const;
 
  protected:
   // Constructor with all attributes
-  SparseTensorBase(const std::shared_ptr<DataType>& type,
-                   const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
-                   const std::shared_ptr<SparseIndex>& sparse_index,
-                   const std::vector<std::string>& dim_names);
+  SparseTensor(const std::shared_ptr<DataType>& type,
+               const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+               const std::shared_ptr<SparseIndex>& sparse_index,
+               const std::vector<std::string>& dim_names);
 
   std::shared_ptr<DataType> type_;
   std::shared_ptr<Buffer> data_;
@@ -165,34 +165,34 @@ class ARROW_EXPORT SparseTensorBase {
 };
 
 // ----------------------------------------------------------------------
-// SparseTensor class
+// SparseTensorImpl class
 
-/// \brief EXPERIMENTAL: Concrete sparse tensor classes with sparse index type
+/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index type
 template <typename SparseIndexType>
-class ARROW_EXPORT SparseTensor : public SparseTensorBase {
+class ARROW_EXPORT SparseTensorImpl : public SparseTensor {
  public:
-  virtual ~SparseTensor() = default;
+  virtual ~SparseTensorImpl() = default;
 
   // Constructor with all attributes
-  SparseTensor(const std::shared_ptr<SparseIndexType>& sparse_index,
-               const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
-               const std::vector<int64_t>& shape,
-               const std::vector<std::string>& dim_names)
-      : SparseTensorBase(type, data, shape, sparse_index, dim_names) {}
+  SparseTensorImpl(const std::shared_ptr<SparseIndexType>& sparse_index,
+                   const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+                   const std::vector<int64_t>& shape,
+                   const std::vector<std::string>& dim_names)
+      : SparseTensor(type, data, shape, sparse_index, dim_names) {}
 
   // Constructor for empty sparse tensor
-  SparseTensor(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
-               const std::vector<std::string>& dim_names = {});
+  SparseTensorImpl(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+                   const std::vector<std::string>& dim_names = {});
 
   // Constructor with a dense numeric tensor
   template <typename TYPE>
-  explicit SparseTensor(const NumericTensor<TYPE>& tensor);
+  explicit SparseTensorImpl(const NumericTensor<TYPE>& tensor);
 
   // Constructor with a dense tensor
-  explicit SparseTensor(const Tensor& tensor);
+  explicit SparseTensorImpl(const Tensor& tensor);
 
  private:
-  ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensor);
+  ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensorImpl);
 };
 
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index e386b096037..e81f0f0dff5 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -51,7 +51,7 @@ static inline bool is_tensor_supported(Type::type type_id) {
 }
 
 template <typename SparseIndexType>
-class SparseTensor;
+class SparseTensorImpl;
 
 class ARROW_EXPORT Tensor {
  public:
@@ -114,7 +114,7 @@ class ARROW_EXPORT Tensor {
   std::vector<std::string> dim_names_;
 
   template <typename SparseIndexType>
-  friend class SparseTensor;
+  friend class SparseTensorImpl;
 
  private:
   ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);

From c83ea6aafc3ac7ade6d67a19027a7f4303d9f77c Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:42:14 +0900
Subject: [PATCH 37/40] Add type aliases of sparse tensor types

---
 cpp/src/arrow/sparse_tensor.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index a6f69240394..a60f533bf81 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -195,6 +195,13 @@ class ARROW_EXPORT SparseTensorImpl : public SparseTensor {
   ARROW_DISALLOW_COPY_AND_ASSIGN(SparseTensorImpl);
 };
 
+/// \brief EXPERIMENTAL: Type alias for COO sparse tensor
+using SparseTensorCOO = SparseTensorImpl<SparseCOOIndex>;
+
+/// \brief EXPERIMENTAL: Type alias for CSR sparse matrix
+using SparseTensorCSR = SparseTensorImpl<SparseCSRIndex>;
+using SparseMatrixCSR = SparseTensorImpl<SparseCSRIndex>;
+
 }  // namespace arrow
 
 #endif  // ARROW_SPARSE_TENSOR_H

From 880bbc4eb1a47992ea542e48ae67f8fe9e2ae0ac Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:48:45 +0900
Subject: [PATCH 38/40] Rename too-verbose function name

sparse_tensor_format_id -> format_id
---
 cpp/src/arrow/compare.cc            | 4 ++--
 cpp/src/arrow/sparse_tensor-test.cc | 2 +-
 cpp/src/arrow/sparse_tensor.h       | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 326aac44814..4d218c6f544 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -824,7 +824,7 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
 template <typename SparseIndexType>
 inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexType>& left,
                                            const SparseTensor& right) {
-  switch (right.sparse_tensor_format_id()) {
+  switch (right.format_id()) {
     case SparseTensorFormat::COO: {
       const auto& right_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
       return SparseTensorEqualsImpl<SparseIndexType, SparseCOOIndex>::Compare(left,
@@ -857,7 +857,7 @@ bool SparseTensorEquals(const SparseTensor& left, const SparseTensor& right) {
     return false;
   }
 
-  switch (left.sparse_tensor_format_id()) {
+  switch (left.format_id()) {
     case SparseTensorFormat::COO: {
       const auto& left_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(left);
       return SparseTensorEqualsImplDispatch(left_coo, right);
diff --git a/cpp/src/arrow/sparse_tensor-test.cc b/cpp/src/arrow/sparse_tensor-test.cc
index 2e4cd2d0336..d48f2d0229d 100644
--- a/cpp/src/arrow/sparse_tensor-test.cc
+++ b/cpp/src/arrow/sparse_tensor-test.cc
@@ -34,7 +34,7 @@ namespace arrow {
 
 static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
                                               const SparseTensor& sparse_tensor) {
-  ASSERT_EQ(expected, sparse_tensor.sparse_tensor_format_id());
+  ASSERT_EQ(expected, sparse_tensor.format_id());
   ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
 }
 
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index a60f533bf81..8ace9979060 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -117,7 +117,7 @@ class ARROW_EXPORT SparseTensor {
  public:
   virtual ~SparseTensor() = default;
 
-  SparseTensorFormat::type sparse_tensor_format_id() const {
+  SparseTensorFormat::type format_id() const {
     return sparse_index_->format_id();
   }
 

From d57e56fc6c454441b231fb8980d94f5491133a85 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:52:15 +0900
Subject: [PATCH 39/40] Merge sparse_tensor_format.h into sparse_tensor.h

---
 cpp/src/arrow/ipc/metadata-internal.h |  2 +-
 cpp/src/arrow/sparse_tensor.h         |  6 +++++-
 cpp/src/arrow/sparse_tensor_format.h  | 26 --------------------------
 3 files changed, 6 insertions(+), 28 deletions(-)
 delete mode 100644 cpp/src/arrow/sparse_tensor_format.h

diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 74a1aef8580..4df8050cddb 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -33,7 +33,7 @@
 #include "arrow/ipc/dictionary.h"  // IYWU pragma: keep
 #include "arrow/ipc/message.h"
 #include "arrow/memory_pool.h"
-#include "arrow/sparse_tensor_format.h"
+#include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
 
 namespace arrow {
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 8ace9979060..69505388330 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -22,7 +22,6 @@
 #include <string>
 #include <vector>
 
-#include "arrow/sparse_tensor_format.h"
 #include "arrow/tensor.h"
 
 namespace arrow {
@@ -30,6 +29,11 @@ namespace arrow {
 // ----------------------------------------------------------------------
 // SparseIndex class
 
+/// \brief EXPERIMENTAL: Sparse tensor format enumeration
+struct SparseTensorFormat {
+  enum type { COO, CSR };
+};
+
 /// \brief EXPERIMENTAL: The base class for representing index of non-zero
 /// values in sparse tensor
 class ARROW_EXPORT SparseIndex {
diff --git a/cpp/src/arrow/sparse_tensor_format.h b/cpp/src/arrow/sparse_tensor_format.h
deleted file mode 100644
index 813378ff0e9..00000000000
--- a/cpp/src/arrow/sparse_tensor_format.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef ARROW_SPARSE_TENSOR_FORMAT_H
-#define ARROW_SPARSE_TENSOR_FORMAT_H
-
-/// \brief EXPERIMENTAL: Sparse tensor format enumeration
-struct SparseTensorFormat {
-  enum type { COO, CSR };
-};
-
-#endif  // ARROW_SPARSE_TENSOR_FORMAT_H

From 148bff82231d9609dfc446e0541a187abf3fb607 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 9 Jan 2019 17:53:00 +0900
Subject: [PATCH 40/40] make format

---
 cpp/src/arrow/compare.cc               | 10 ++--
 cpp/src/arrow/compare.h                |  3 +-
 cpp/src/arrow/ipc/metadata-internal.cc |  7 ++-
 cpp/src/arrow/ipc/metadata-internal.h  |  3 +-
 cpp/src/arrow/ipc/reader.cc            |  6 ++-
 cpp/src/arrow/sparse_tensor.cc         | 66 +++++++++++++-------------
 cpp/src/arrow/sparse_tensor.h          | 18 +++----
 7 files changed, 58 insertions(+), 55 deletions(-)

diff --git a/cpp/src/arrow/compare.cc b/cpp/src/arrow/compare.cc
index 4d218c6f544..114752934c9 100644
--- a/cpp/src/arrow/compare.cc
+++ b/cpp/src/arrow/compare.cc
@@ -826,15 +826,17 @@ inline bool SparseTensorEqualsImplDispatch(const SparseTensorImpl<SparseIndexTyp
                                            const SparseTensor& right) {
   switch (right.format_id()) {
     case SparseTensorFormat::COO: {
-      const auto& right_coo = checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
+      const auto& right_coo =
+          checked_cast<const SparseTensorImpl<SparseCOOIndex>&>(right);
       return SparseTensorEqualsImpl<SparseIndexType, SparseCOOIndex>::Compare(left,
-                                                                               right_coo);
+                                                                              right_coo);
     }
 
     case SparseTensorFormat::CSR: {
-      const auto& right_csr = checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(right);
+      const auto& right_csr =
+          checked_cast<const SparseTensorImpl<SparseCSRIndex>&>(right);
       return SparseTensorEqualsImpl<SparseIndexType, SparseCSRIndex>::Compare(left,
-                                                                               right_csr);
+                                                                              right_csr);
     }
 
     default:
diff --git a/cpp/src/arrow/compare.h b/cpp/src/arrow/compare.h
index 331e81bfd3a..d49d7cc0fdb 100644
--- a/cpp/src/arrow/compare.h
+++ b/cpp/src/arrow/compare.h
@@ -37,8 +37,7 @@ bool ARROW_EXPORT ArrayEquals(const Array& left, const Array& right);
 bool ARROW_EXPORT TensorEquals(const Tensor& left, const Tensor& right);
 
 /// EXPERIMENTAL: Returns true if the given sparse tensors are exactly equal
-bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left,
-                                     const SparseTensor& right);
+bool ARROW_EXPORT SparseTensorEquals(const SparseTensor& left, const SparseTensor& right);
 
 /// Returns true if the arrays are approximately equal. For non-floating point
 /// types, this is equivalent to ArrayEquals(left, right)
diff --git a/cpp/src/arrow/ipc/metadata-internal.cc b/cpp/src/arrow/ipc/metadata-internal.cc
index 8bc6623551b..da6711395f8 100644
--- a/cpp/src/arrow/ipc/metadata-internal.cc
+++ b/cpp/src/arrow/ipc/metadata-internal.cc
@@ -836,8 +836,8 @@ Status MakeSparseTensorIndex(FBB& fbb, const SparseIndex& sparse_index,
   return Status::OK();
 }
 
-Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor,
-                        int64_t body_length, const std::vector<BufferMetadata>& buffers,
+Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor, int64_t body_length,
+                        const std::vector<BufferMetadata>& buffers,
                         SparseTensorOffset* offset) {
   flatbuf::Type fb_type_type;
   Offset fb_type;
@@ -872,8 +872,7 @@ Status MakeSparseTensor(FBB& fbb, const SparseTensor& sparse_tensor,
   return Status::OK();
 }
 
-Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor,
-                                int64_t body_length,
+Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor, int64_t body_length,
                                 const std::vector<BufferMetadata>& buffers,
                                 std::shared_ptr<Buffer>* out) {
   FBB fbb;
diff --git a/cpp/src/arrow/ipc/metadata-internal.h b/cpp/src/arrow/ipc/metadata-internal.h
index 4df8050cddb..6562382b878 100644
--- a/cpp/src/arrow/ipc/metadata-internal.h
+++ b/cpp/src/arrow/ipc/metadata-internal.h
@@ -145,8 +145,7 @@ Status WriteRecordBatchMessage(const int64_t length, const int64_t body_length,
 Status WriteTensorMessage(const Tensor& tensor, const int64_t buffer_start_offset,
                           std::shared_ptr<Buffer>* out);
 
-Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor,
-                                int64_t body_length,
+Status WriteSparseTensorMessage(const SparseTensor& sparse_tensor, int64_t body_length,
                                 const std::vector<BufferMetadata>& buffers,
                                 std::shared_ptr<Buffer>* out);
 
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 1207427806b..e856acafd71 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -773,7 +773,8 @@ Status MakeSparseTensorWithSparseCOOIndex(
     const std::vector<std::string>& dim_names,
     const std::shared_ptr<SparseCOOIndex>& sparse_index, int64_t non_zero_length,
     const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensor>* out) {
-  *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type, data, shape, dim_names);
+  *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type, data,
+                                                            shape, dim_names);
   return Status::OK();
 }
 
@@ -782,7 +783,8 @@ Status MakeSparseTensorWithSparseCSRIndex(
     const std::vector<std::string>& dim_names,
     const std::shared_ptr<SparseCSRIndex>& sparse_index, int64_t non_zero_length,
     const std::shared_ptr<Buffer>& data, std::shared_ptr<SparseTensor>* out) {
-  *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type, data, shape, dim_names);
+  *out = std::make_shared<SparseTensorImpl<SparseCSRIndex>>(sparse_index, type, data,
+                                                            shape, dim_names);
   return Status::OK();
 }
 
diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc
index eab7cacc211..101500d3643 100644
--- a/cpp/src/arrow/sparse_tensor.cc
+++ b/cpp/src/arrow/sparse_tensor.cc
@@ -311,10 +311,10 @@ std::string SparseCSRIndex::ToString() const { return std::string("SparseCSRInde
 
 // Constructor with all attributes
 SparseTensor::SparseTensor(const std::shared_ptr<DataType>& type,
-                                   const std::shared_ptr<Buffer>& data,
-                                   const std::vector<int64_t>& shape,
-                                   const std::shared_ptr<SparseIndex>& sparse_index,
-                                   const std::vector<std::string>& dim_names)
+                           const std::shared_ptr<Buffer>& data,
+                           const std::vector<int64_t>& shape,
+                           const std::shared_ptr<SparseIndex>& sparse_index,
+                           const std::vector<std::string>& dim_names)
     : type_(type),
       data_(data),
       shape_(shape),
@@ -346,16 +346,17 @@ bool SparseTensor::Equals(const SparseTensor& other) const {
 
 // Constructor with a dense tensor
 template <typename SparseIndexType>
-SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const std::shared_ptr<DataType>& type,
-                                                    const std::vector<int64_t>& shape,
-                                                    const std::vector<std::string>& dim_names)
+SparseTensorImpl<SparseIndexType>::SparseTensorImpl(
+    const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+    const std::vector<std::string>& dim_names)
     : SparseTensorImpl(nullptr, type, nullptr, shape, dim_names) {}
 
 // Constructor with a dense tensor
 template <typename SparseIndexType>
 template <typename TYPE>
 SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const NumericTensor<TYPE>& tensor)
-    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
+    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(),
+                       tensor.dim_names_) {
   SparseTensorConverter<TYPE, SparseIndexType> converter(tensor);
   DCHECK_OK(converter.Convert());
   sparse_index_ = converter.sparse_index;
@@ -365,7 +366,8 @@ SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const NumericTensor<TYPE>& t
 // Constructor with a dense tensor
 template <typename SparseIndexType>
 SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const Tensor& tensor)
-    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(), tensor.dim_names_) {
+    : SparseTensorImpl(nullptr, tensor.type(), nullptr, tensor.shape(),
+                       tensor.dim_names_) {
   switch (tensor.type()->id()) {
     case Type::UINT8:
       MakeSparseTensorFromTensor<UInt8Type, SparseIndexType>(tensor, &sparse_index_,
@@ -419,29 +421,29 @@ SparseTensorImpl<SparseIndexType>::SparseTensorImpl(const Tensor& tensor)
 // ----------------------------------------------------------------------
 // Instantiate templates
 
-#define INSTANTIATE_SPARSE_TENSOR(IndexType)                    \
-  template class ARROW_TEMPLATE_EXPORT SparseTensorImpl<IndexType>; \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<UInt8Type>&);                         \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<UInt16Type>&);                        \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<UInt32Type>&);                        \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<UInt64Type>&);                        \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<Int8Type>&);                          \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<Int16Type>&);                         \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<Int32Type>&);                         \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<Int64Type>&);                         \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<HalfFloatType>&);                     \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
-      const NumericTensor<FloatType>&);                         \
-  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl(  \
+#define INSTANTIATE_SPARSE_TENSOR(IndexType)                           \
+  template class ARROW_TEMPLATE_EXPORT SparseTensorImpl<IndexType>;    \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt8Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt16Type>&);                               \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt32Type>&);                               \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<UInt64Type>&);                               \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int8Type>&);                                 \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int16Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int32Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<Int64Type>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<HalfFloatType>&);                            \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
+      const NumericTensor<FloatType>&);                                \
+  template ARROW_EXPORT SparseTensorImpl<IndexType>::SparseTensorImpl( \
       const NumericTensor<DoubleType>&)
 
 INSTANTIATE_SPARSE_TENSOR(SparseCOOIndex);
diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h
index 69505388330..c7693d2ec95 100644
--- a/cpp/src/arrow/sparse_tensor.h
+++ b/cpp/src/arrow/sparse_tensor.h
@@ -121,9 +121,7 @@ class ARROW_EXPORT SparseTensor {
  public:
   virtual ~SparseTensor() = default;
 
-  SparseTensorFormat::type format_id() const {
-    return sparse_index_->format_id();
-  }
+  SparseTensorFormat::type format_id() const { return sparse_index_->format_id(); }
 
   std::shared_ptr<DataType> type() const { return type_; }
   std::shared_ptr<Buffer> data() const { return data_; }
@@ -154,8 +152,8 @@ class ARROW_EXPORT SparseTensor {
 
  protected:
   // Constructor with all attributes
-  SparseTensor(const std::shared_ptr<DataType>& type,
-               const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+  SparseTensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
+               const std::vector<int64_t>& shape,
                const std::shared_ptr<SparseIndex>& sparse_index,
                const std::vector<std::string>& dim_names);
 
@@ -171,7 +169,8 @@ class ARROW_EXPORT SparseTensor {
 // ----------------------------------------------------------------------
 // SparseTensorImpl class
 
-/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index type
+/// \brief EXPERIMENTAL: Concrete sparse tensor implementation classes with sparse index
+/// type
 template <typename SparseIndexType>
 class ARROW_EXPORT SparseTensorImpl : public SparseTensor {
  public:
@@ -179,13 +178,14 @@ class ARROW_EXPORT SparseTensorImpl : public SparseTensor {
 
   // Constructor with all attributes
   SparseTensorImpl(const std::shared_ptr<SparseIndexType>& sparse_index,
-                   const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
-                   const std::vector<int64_t>& shape,
+                   const std::shared_ptr<DataType>& type,
+                   const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
                    const std::vector<std::string>& dim_names)
       : SparseTensor(type, data, shape, sparse_index, dim_names) {}
 
   // Constructor for empty sparse tensor
-  SparseTensorImpl(const std::shared_ptr<DataType>& type, const std::vector<int64_t>& shape,
+  SparseTensorImpl(const std::shared_ptr<DataType>& type,
+                   const std::vector<int64_t>& shape,
                    const std::vector<std::string>& dim_names = {});
 
   // Constructor with a dense numeric tensor