From 2064321abf9a1edabb2ecc5604691099544e4869 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 3 Jul 2020 23:03:33 +0900
Subject: [PATCH 1/4] Improve the conversion performance from Tensor to
 SparseCOOTensor

This change improves the conversion speed for all the cases of row-major
and column-major tensors.  For strided tensors, all the cases are improved
except for the combination of int16 value and less than 32-bit index.

The result from `archery benchmark diff` command is below, the baseline is
the commit 8f96d1dc9 (before merging #7539) and the contender is this commit:

```
                                                                            benchmark       baseline      contender  change %
43       Int16StridedTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  141564.498765  182313.374077    28.785
10       Int16StridedTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  140420.265077  153715.618715     9.468
42         Int16StridedTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  167601.944005  170626.538009     1.805
37       Int16StridedTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  143722.048451  141928.779114    -1.248
27        Int8StridedTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  169947.630903  164423.055008    -3.251
24          Int8StridedTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  170153.324442  163898.373534    -3.676
45        Int8StridedTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  170883.542468  164131.618700    -3.951
35        Int8StridedTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  171015.028153  163516.191034    -4.385
9         DoubleStridedTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  200974.675587  191956.688874    -4.487
18         FloatStridedTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  192320.819787  182941.130595    -4.877
12      DoubleStridedTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  175198.892973  166417.452194    -5.012
30       FloatStridedTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  167174.764713  151431.022906    -9.418
29      DoubleStridedTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  173925.990981  157142.110096    -9.650
16       FloatStridedTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  167877.497573  151666.610814    -9.656
26       FloatStridedTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  169705.312801  151885.952803   -10.500
6       DoubleStridedTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  177394.661870  156019.301906   -12.050
5       Int16RowMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  107592.839089   66069.770737   -38.593
41   Int16ColumnMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  114841.700196   68707.073774   -40.172
47      Int16RowMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  107304.436017   63922.898636   -40.428
4       FloatRowMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  112315.965200   66577.854744   -40.723
21   Int16ColumnMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  115090.317912   66527.852021   -42.195
17   FloatColumnMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  121583.540341   70025.614174   -42.405
3      DoubleRowMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  116946.572632   66411.338694   -43.212
15      FloatRowMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  112275.805149   63264.226406   -43.653
13   FloatColumnMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  122085.596559   66569.027159   -45.473
34      Int16RowMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  109888.801628   58860.826009   -46.436
20   Int16ColumnMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  117648.480324   62574.709433   -46.812
19      Int8ColumnMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  137444.576787   71969.132261   -47.638
28     DoubleRowMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  119527.435615   61405.371141   -48.627
40      FloatRowMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  115130.821188   58664.779831   -49.045
39    Int8ColumnMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  137053.503574   69755.112894   -49.104
22         Int8RowMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  136645.576795   69303.266896   -49.282
23   FloatColumnMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  124100.575779   61723.051518   -50.264
31  DoubleColumnMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  140278.467902   69584.530347   -50.395
1         Int16RowMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  135770.669563   67151.922438   -50.540
44     Int16ColumnMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  142625.928542   70315.759868   -50.699
2     Int8ColumnMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  137443.030096   67752.813535   -50.705
46       Int8RowMajorTensorConversionFixture<Int16Type>/ConvertToSparseCOOTensorInt16  135961.160225   66613.351871   -51.006
11  DoubleColumnMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  138857.793332   67315.714410   -51.522
8         FloatRowMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  138992.703542   66847.061004   -51.906
7        Int8RowMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  136298.424804   64520.497064   -52.662
36     FloatColumnMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  149706.883716   69805.958679   -53.372
33       DoubleRowMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  143460.582904   66870.585026   -53.387
38  DoubleColumnMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  138220.367601   64425.776453   -53.389
14     DoubleRowMajorTensorConversionFixture<Int32Type>/ConvertToSparseCOOTensorInt32  136707.421042   63624.050357   -53.460
25    Int8ColumnMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  137303.219403   62528.740787   -54.459
32       Int8RowMajorTensorConversionFixture<Int64Type>/ConvertToSparseCOOTensorInt64  136551.052565   58743.141699   -56.981
0     DoubleColumnMajorTensorConversionFixture<Int8Type>/ConvertToSparseCOOTensorInt8  162895.437265   69676.279783   -57.226
```
---
 cpp/src/arrow/tensor/coo_converter.cc | 206 +++++++++++++++++++++++---
 cpp/src/arrow/tensor/csx_converter.cc |   2 +-
 2 files changed, 186 insertions(+), 22 deletions(-)
diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc
index 2389ffc6e5f..7f4b4795469 100644
--- a/cpp/src/arrow/tensor/coo_converter.cc
+++ b/cpp/src/arrow/tensor/coo_converter.cc
@@ -17,8 +17,10 @@
 
 #include "arrow/tensor/converter.h"
 
+#include <algorithm>
 #include <cstdint>
 #include <memory>
+#include <numeric>
 #include <vector>
 
 #include "arrow/buffer.h"
@@ -34,8 +36,9 @@ class MemoryPool;
 namespace internal {
 namespace {
 
-inline void IncrementIndex(std::vector<int64_t>& coord,
-                           const std::vector<int64_t>& shape) {
+template <typename c_index_type>
+inline void IncrementRowMajorIndex(std::vector<c_index_type>& coord,
+                                   const std::vector<int64_t>& shape) {
   const int64_t ndim = shape.size();
   ++coord[ndim - 1];
   if (coord[ndim - 1] == shape[ndim - 1]) {
@@ -48,6 +51,178 @@ inline void IncrementIndex(std::vector<int64_t>& coord,
   }
 }
 
+template <typename c_index_type, typename c_value_type>
+void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices,
+                           c_value_type* values, const int64_t size) {
+  const auto ndim = tensor.ndim();
+  const auto& shape = tensor.shape();
+  const c_value_type* tensor_data =
+      reinterpret_cast<const c_value_type*>(tensor.raw_data());
+
+  constexpr c_value_type zero = 0;
+  std::vector<c_index_type> coord(ndim, 0);
+  for (int64_t n = tensor.size(); n > 0; --n) {
+    const c_value_type x = *tensor_data;
+    if (ARROW_PREDICT_FALSE(x != zero)) {
+      std::copy(coord.begin(), coord.end(), indices);
+      *values++ = x;
+      indices += ndim;
+    }
+
+    IncrementRowMajorIndex(coord, shape);
+    ++tensor_data;
+  }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices,
+                              c_value_type* out_values, const int64_t size) {
+  const auto ndim = tensor.ndim();
+  std::vector<c_index_type> indices(ndim * size);
+  std::vector<c_value_type> values(size);
+  ConvertRowMajorTensor(tensor, indices.data(), values.data(), size);
+
+  // transpose indices
+  for (int64_t i = 0; i < size; ++i) {
+    for (int j = 0; j < ndim / 2; ++j) {
+      std::swap(indices[i * ndim + j], indices[i * ndim + ndim - j - 1]);
+    }
+  }
+
+  // sort indices
+  std::vector<int64_t> order(size);
+  std::iota(order.begin(), order.end(), 0);
+  std::sort(order.begin(), order.end(), [&](const int64_t xi, const int64_t yi) {
+    const int64_t x_offset = xi * ndim;
+    const int64_t y_offset = yi * ndim;
+    for (int j = 0; j < ndim; ++j) {
+      const auto x = indices[x_offset + j];
+      const auto y = indices[y_offset + j];
+      if (x < y) return true;
+      if (x > y) return false;
+    }
+    return false;
+  });
+
+  // transfer result
+  const auto* indices_data = indices.data();
+  for (int64_t i = 0; i < size; ++i) {
+    out_values[i] = values[i];
+
+    std::copy_n(indices_data, ndim, out_indices);
+    indices_data += ndim;
+    out_indices += ndim;
+  }
+}
+
+template <typename c_index_type, typename c_value_type>
+void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
+                          c_value_type* values, const int64_t size) {
+  using ValueType = typename CTypeTraits<c_value_type>::ArrowType;
+  const auto& shape = tensor.shape();
+  const auto ndim = tensor.ndim();
+  std::vector<int64_t> coord(ndim, 0);
+
+  constexpr c_value_type zero = 0;
+  c_value_type x;
+  int64_t i;
+  for (int64_t n = tensor.size(); n > 0; --n) {
+    x = tensor.Value<ValueType>(coord);
+    if (ARROW_PREDICT_FALSE(x != zero)) {
+      *values++ = x;
+      for (i = 0; i < ndim; ++i) {
+        *indices++ = static_cast<c_index_type>(coord[i]);
+      }
+    }
+
+    IncrementRowMajorIndex(coord, shape);
+  }
+}
+
+#define CONVERT_ROW_MAJOR_TENSOR(index_type, indices, value_type, values, size) \
+  ConvertRowMajorTensor<index_type, value_type>(                                \
+      tensor_, reinterpret_cast<index_type*>(indices),                          \
+      reinterpret_cast<value_type*>(values), size)
+
+#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, indices, value_type, values, size) \
+  ConvertColumnMajorTensor<index_type, value_type>(                                \
+      tensor_, reinterpret_cast<index_type*>(indices),                             \
+      reinterpret_cast<value_type*>(values), size)
+
+#define CONVERT_STRIDED_TENSOR(index_type, indices, value_type, values, size) \
+  ConvertStridedTensor<index_type, value_type>(                               \
+      tensor_, reinterpret_cast<index_type*>(indices),                        \
+      reinterpret_cast<value_type*>(values), size)
+
+#define DISPATCH_CONVERT_TENSOR_INLINE(kind, indices, index_elsize, values,   \
+                                       value_elsize, size)                    \
+  switch (index_elsize) {                                                     \
+    case 1:                                                                   \
+      switch (value_elsize) {                                                 \
+        case 1:                                                               \
+          CONVERT_##kind##_TENSOR(uint8_t, indices, uint8_t, values, size);   \
+          break;                                                              \
+        case 2:                                                               \
+          CONVERT_##kind##_TENSOR(uint8_t, indices, uint16_t, values, size);  \
+          break;                                                              \
+        case 4:                                                               \
+          CONVERT_##kind##_TENSOR(uint8_t, indices, uint32_t, values, size);  \
+          break;                                                              \
+        case 8:                                                               \
+          CONVERT_##kind##_TENSOR(uint8_t, indices, int64_t, values, size);   \
+          break;                                                              \
+      }                                                                       \
+      break;                                                                  \
+    case 2:                                                                   \
+      switch (value_elsize) {                                                 \
+        case 1:                                                               \
+          CONVERT_##kind##_TENSOR(uint16_t, indices, uint8_t, values, size);  \
+          break;                                                              \
+        case 2:                                                               \
+          CONVERT_##kind##_TENSOR(uint16_t, indices, uint16_t, values, size); \
+          break;                                                              \
+        case 4:                                                               \
+          CONVERT_##kind##_TENSOR(uint16_t, indices, uint32_t, values, size); \
+          break;                                                              \
+        case 8:                                                               \
+          CONVERT_##kind##_TENSOR(uint16_t, indices, int64_t, values, size);  \
+          break;                                                              \
+      }                                                                       \
+      break;                                                                  \
+    case 4:                                                                   \
+      switch (value_elsize) {                                                 \
+        case 1:                                                               \
+          CONVERT_##kind##_TENSOR(uint32_t, indices, uint8_t, values, size);  \
+          break;                                                              \
+        case 2:                                                               \
+          CONVERT_##kind##_TENSOR(uint32_t, indices, uint16_t, values, size); \
+          break;                                                              \
+        case 4:                                                               \
+          CONVERT_##kind##_TENSOR(uint32_t, indices, uint32_t, values, size); \
+          break;                                                              \
+        case 8:                                                               \
+          CONVERT_##kind##_TENSOR(uint32_t, indices, int64_t, values, size);  \
+          break;                                                              \
+      }                                                                       \
+      break;                                                                  \
+    case 8:                                                                   \
+      switch (value_elsize) {                                                 \
+        case 1:                                                               \
+          CONVERT_##kind##_TENSOR(int64_t, indices, uint8_t, values, size);   \
+          break;                                                              \
+        case 2:                                                               \
+          CONVERT_##kind##_TENSOR(int64_t, indices, uint16_t, values, size);  \
+          break;                                                              \
+        case 4:                                                               \
+          CONVERT_##kind##_TENSOR(int64_t, indices, uint32_t, values, size);  \
+          break;                                                              \
+        case 8:                                                               \
+          CONVERT_##kind##_TENSOR(int64_t, indices, int64_t, values, size);   \
+          break;                                                              \
+      }                                                                       \
+      break;                                                                  \
+  }
+
 // ----------------------------------------------------------------------
 // SparseTensorConverter for SparseCOOIndex
 
@@ -92,26 +267,15 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin {
         }
         tensor_data += value_elsize;
       }
+    } else if (tensor_.is_row_major()) {
+      DISPATCH_CONVERT_TENSOR_INLINE(ROW_MAJOR, indices, index_elsize, values,
+                                     value_elsize, nonzero_count);
+    } else if (tensor_.is_column_major()) {
+      DISPATCH_CONVERT_TENSOR_INLINE(COLUMN_MAJOR, indices, index_elsize, values,
+                                     value_elsize, nonzero_count);
     } else {
-      const std::vector<int64_t>& shape = tensor_.shape();
-      std::vector<int64_t> coord(ndim, 0);  // The current logical coordinates
-
-      for (int64_t n = tensor_.size(); n > 0; n--) {
-        int64_t offset = tensor_.CalculateValueOffset(coord);
-        if (std::any_of(tensor_data + offset, tensor_data + offset + value_elsize,
-                        IsNonZero)) {
-          std::copy_n(tensor_data + offset, value_elsize, values);
-          values += value_elsize;
-
-          // Write indices in row-major order.
-          for (int64_t i = 0; i < ndim; ++i) {
-            AssignIndex(indices, coord[i], index_elsize);
-            indices += index_elsize;
-          }
-        }
-
-        IncrementIndex(coord, shape);
-      }
+      DISPATCH_CONVERT_TENSOR_INLINE(STRIDED, indices, index_elsize, values, value_elsize,
+                                     nonzero_count);
     }
 
     // make results
diff --git a/cpp/src/arrow/tensor/csx_converter.cc b/cpp/src/arrow/tensor/csx_converter.cc
index 90ca6eb9b85..5ce99d4c3e6 100644
--- a/cpp/src/arrow/tensor/csx_converter.cc
+++ b/cpp/src/arrow/tensor/csx_converter.cc
@@ -181,7 +181,7 @@ Result<std::shared_ptr<Tensor>> MakeTensorFromSparseCSXMatrix(
 
   const auto nc = shape[1];
 
-  int64_t offset;
+  int64_t offset = 0;
   for (int64_t i = 0; i < indptr->size() - 1; ++i) {
     const auto start =
         SparseTensorConverterMixin::GetIndexValue(indptr_data, indptr_elsize);

From 1609bf749e62dbfbece3d29c8a75ca192258231d Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 8 Jul 2020 10:47:35 +0900
Subject: [PATCH 2/4] Rewrite DISPATCH macro

---
 cpp/src/arrow/tensor/converter_internal.h | 89 +++++++++++++++++++++++
 cpp/src/arrow/tensor/coo_converter.cc     | 87 +++-------------------
 2 files changed, 98 insertions(+), 78 deletions(-)
 create mode 100644 cpp/src/arrow/tensor/converter_internal.h

diff --git a/cpp/src/arrow/tensor/converter_internal.h b/cpp/src/arrow/tensor/converter_internal.h
new file mode 100644
index 00000000000..edd4344108e
--- /dev/null
+++ b/cpp/src/arrow/tensor/converter_internal.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/tensor/converter.h"
+
+#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \
+  switch (index_elsize) { \
+    case 1: \
+      switch (value_elsize) { \
+        case 1: \
+          ACTION(uint8_t, uint8_t, __VA_ARGS__); \
+          break; \
+        case 2: \
+          ACTION(uint8_t, uint16_t, __VA_ARGS__); \
+          break; \
+        case 4: \
+          ACTION(uint8_t, uint32_t, __VA_ARGS__); \
+          break; \
+        case 8: \
+          ACTION(uint8_t, uint64_t, __VA_ARGS__); \
+          break; \
+      } \
+      break; \
+    case 2: \
+      switch (value_elsize) { \
+        case 1: \
+          ACTION(uint16_t, uint8_t, __VA_ARGS__); \
+          break; \
+        case 2: \
+          ACTION(uint16_t, uint16_t, __VA_ARGS__); \
+          break; \
+        case 4: \
+          ACTION(uint16_t, uint32_t, __VA_ARGS__); \
+          break; \
+        case 8: \
+          ACTION(uint16_t, uint64_t, __VA_ARGS__); \
+          break; \
+      } \
+      break; \
+    case 4: \
+      switch (value_elsize) { \
+        case 1: \
+          ACTION(uint32_t, uint8_t, __VA_ARGS__); \
+          break; \
+        case 2: \
+          ACTION(uint32_t, uint16_t, __VA_ARGS__); \
+          break; \
+        case 4: \
+          ACTION(uint32_t, uint32_t, __VA_ARGS__); \
+          break; \
+        case 8: \
+          ACTION(uint32_t, uint64_t, __VA_ARGS__); \
+          break; \
+      } \
+      break; \
+    case 8: \
+      switch (value_elsize) { \
+        case 1: \
+          ACTION(int64_t, uint8_t, __VA_ARGS__); \
+          break; \
+        case 2: \
+          ACTION(int64_t, uint16_t, __VA_ARGS__); \
+          break; \
+        case 4: \
+          ACTION(int64_t, uint32_t, __VA_ARGS__); \
+          break; \
+        case 8: \
+          ACTION(int64_t, uint64_t, __VA_ARGS__); \
+          break; \
+      } \
+      break; \
+  }
+
diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc
index 7f4b4795469..32f7c6bd526 100644
--- a/cpp/src/arrow/tensor/coo_converter.cc
+++ b/cpp/src/arrow/tensor/coo_converter.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include "arrow/tensor/converter.h"
+#include "arrow/tensor/converter_internal.h"
 
 #include <algorithm>
 #include <cstdint>
@@ -139,89 +139,23 @@ void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
   }
 }
 
-#define CONVERT_ROW_MAJOR_TENSOR(index_type, indices, value_type, values, size) \
+#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, indices, values, size) \
   ConvertRowMajorTensor<index_type, value_type>(                                \
       tensor_, reinterpret_cast<index_type*>(indices),                          \
       reinterpret_cast<value_type*>(values), size)
 
-#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, indices, value_type, values, size) \
+#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, indices, values, size) \
   ConvertColumnMajorTensor<index_type, value_type>(                                \
       tensor_, reinterpret_cast<index_type*>(indices),                             \
       reinterpret_cast<value_type*>(values), size)
 
-#define CONVERT_STRIDED_TENSOR(index_type, indices, value_type, values, size) \
+#define CONVERT_STRIDED_TENSOR(index_type, value_type, indices, values, size) \
   ConvertStridedTensor<index_type, value_type>(                               \
       tensor_, reinterpret_cast<index_type*>(indices),                        \
       reinterpret_cast<value_type*>(values), size)
 
-#define DISPATCH_CONVERT_TENSOR_INLINE(kind, indices, index_elsize, values,   \
-                                       value_elsize, size)                    \
-  switch (index_elsize) {                                                     \
-    case 1:                                                                   \
-      switch (value_elsize) {                                                 \
-        case 1:                                                               \
-          CONVERT_##kind##_TENSOR(uint8_t, indices, uint8_t, values, size);   \
-          break;                                                              \
-        case 2:                                                               \
-          CONVERT_##kind##_TENSOR(uint8_t, indices, uint16_t, values, size);  \
-          break;                                                              \
-        case 4:                                                               \
-          CONVERT_##kind##_TENSOR(uint8_t, indices, uint32_t, values, size);  \
-          break;                                                              \
-        case 8:                                                               \
-          CONVERT_##kind##_TENSOR(uint8_t, indices, int64_t, values, size);   \
-          break;                                                              \
-      }                                                                       \
-      break;                                                                  \
-    case 2:                                                                   \
-      switch (value_elsize) {                                                 \
-        case 1:                                                               \
-          CONVERT_##kind##_TENSOR(uint16_t, indices, uint8_t, values, size);  \
-          break;                                                              \
-        case 2:                                                               \
-          CONVERT_##kind##_TENSOR(uint16_t, indices, uint16_t, values, size); \
-          break;                                                              \
-        case 4:                                                               \
-          CONVERT_##kind##_TENSOR(uint16_t, indices, uint32_t, values, size); \
-          break;                                                              \
-        case 8:                                                               \
-          CONVERT_##kind##_TENSOR(uint16_t, indices, int64_t, values, size);  \
-          break;                                                              \
-      }                                                                       \
-      break;                                                                  \
-    case 4:                                                                   \
-      switch (value_elsize) {                                                 \
-        case 1:                                                               \
-          CONVERT_##kind##_TENSOR(uint32_t, indices, uint8_t, values, size);  \
-          break;                                                              \
-        case 2:                                                               \
-          CONVERT_##kind##_TENSOR(uint32_t, indices, uint16_t, values, size); \
-          break;                                                              \
-        case 4:                                                               \
-          CONVERT_##kind##_TENSOR(uint32_t, indices, uint32_t, values, size); \
-          break;                                                              \
-        case 8:                                                               \
-          CONVERT_##kind##_TENSOR(uint32_t, indices, int64_t, values, size);  \
-          break;                                                              \
-      }                                                                       \
-      break;                                                                  \
-    case 8:                                                                   \
-      switch (value_elsize) {                                                 \
-        case 1:                                                               \
-          CONVERT_##kind##_TENSOR(int64_t, indices, uint8_t, values, size);   \
-          break;                                                              \
-        case 2:                                                               \
-          CONVERT_##kind##_TENSOR(int64_t, indices, uint16_t, values, size);  \
-          break;                                                              \
-        case 4:                                                               \
-          CONVERT_##kind##_TENSOR(int64_t, indices, uint32_t, values, size);  \
-          break;                                                              \
-        case 8:                                                               \
-          CONVERT_##kind##_TENSOR(int64_t, indices, int64_t, values, size);   \
-          break;                                                              \
-      }                                                                       \
-      break;                                                                  \
-  }
+#define CONVERT_TENSOR(index_type, value_type, KIND, ...) \
+  CONVERT_##KIND##_TENSOR(index_type, value_type, __VA_ARGS__)
 
 // ----------------------------------------------------------------------
 // SparseTensorConverter for SparseCOOIndex
@@ -268,14 +202,11 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin {
         tensor_data += value_elsize;
       }
     } else if (tensor_.is_row_major()) {
-      DISPATCH_CONVERT_TENSOR_INLINE(ROW_MAJOR, indices, index_elsize, values,
-                                     value_elsize, nonzero_count);
+      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, ROW_MAJOR, indices, values, nonzero_count);
     } else if (tensor_.is_column_major()) {
-      DISPATCH_CONVERT_TENSOR_INLINE(COLUMN_MAJOR, indices, index_elsize, values,
-                                     value_elsize, nonzero_count);
+      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, COLUMN_MAJOR, indices, values, nonzero_count);
     } else {
-      DISPATCH_CONVERT_TENSOR_INLINE(STRIDED, indices, index_elsize, values, value_elsize,
-                                     nonzero_count);
+      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, STRIDED, indices, values, nonzero_count);
     }
 
     // make results

From 80c497335624d45e6bdced504e0778ecefd08b7f Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 10 Jul 2020 09:09:09 +0900
Subject: [PATCH 3/4] Fix format

---
 cpp/src/arrow/tensor/converter_internal.h | 131 +++++++++++-----------
 cpp/src/arrow/tensor/coo_converter.cc     |   9 +-
 2 files changed, 71 insertions(+), 69 deletions(-)

diff --git a/cpp/src/arrow/tensor/converter_internal.h b/cpp/src/arrow/tensor/converter_internal.h
index edd4344108e..3a87feaf4b3 100644
--- a/cpp/src/arrow/tensor/converter_internal.h
+++ b/cpp/src/arrow/tensor/converter_internal.h
@@ -20,70 +20,69 @@
 #include "arrow/tensor/converter.h"
 
 #define DISPATCH(ACTION, index_elsize, value_elsize, ...) \
-  switch (index_elsize) { \
-    case 1: \
-      switch (value_elsize) { \
-        case 1: \
-          ACTION(uint8_t, uint8_t, __VA_ARGS__); \
-          break; \
-        case 2: \
-          ACTION(uint8_t, uint16_t, __VA_ARGS__); \
-          break; \
-        case 4: \
-          ACTION(uint8_t, uint32_t, __VA_ARGS__); \
-          break; \
-        case 8: \
-          ACTION(uint8_t, uint64_t, __VA_ARGS__); \
-          break; \
-      } \
-      break; \
-    case 2: \
-      switch (value_elsize) { \
-        case 1: \
-          ACTION(uint16_t, uint8_t, __VA_ARGS__); \
-          break; \
-        case 2: \
-          ACTION(uint16_t, uint16_t, __VA_ARGS__); \
-          break; \
-        case 4: \
-          ACTION(uint16_t, uint32_t, __VA_ARGS__); \
-          break; \
-        case 8: \
-          ACTION(uint16_t, uint64_t, __VA_ARGS__); \
-          break; \
-      } \
-      break; \
-    case 4: \
-      switch (value_elsize) { \
-        case 1: \
-          ACTION(uint32_t, uint8_t, __VA_ARGS__); \
-          break; \
-        case 2: \
-          ACTION(uint32_t, uint16_t, __VA_ARGS__); \
-          break; \
-        case 4: \
-          ACTION(uint32_t, uint32_t, __VA_ARGS__); \
-          break; \
-        case 8: \
-          ACTION(uint32_t, uint64_t, __VA_ARGS__); \
-          break; \
-      } \
-      break; \
-    case 8: \
-      switch (value_elsize) { \
-        case 1: \
-          ACTION(int64_t, uint8_t, __VA_ARGS__); \
-          break; \
-        case 2: \
-          ACTION(int64_t, uint16_t, __VA_ARGS__); \
-          break; \
-        case 4: \
-          ACTION(int64_t, uint32_t, __VA_ARGS__); \
-          break; \
-        case 8: \
-          ACTION(int64_t, uint64_t, __VA_ARGS__); \
-          break; \
-      } \
-      break; \
+  switch (index_elsize) {                                 \
+    case 1:                                               \
+      switch (value_elsize) {                             \
+        case 1:                                           \
+          ACTION(uint8_t, uint8_t, __VA_ARGS__);          \
+          break;                                          \
+        case 2:                                           \
+          ACTION(uint8_t, uint16_t, __VA_ARGS__);         \
+          break;                                          \
+        case 4:                                           \
+          ACTION(uint8_t, uint32_t, __VA_ARGS__);         \
+          break;                                          \
+        case 8:                                           \
+          ACTION(uint8_t, uint64_t, __VA_ARGS__);         \
+          break;                                          \
+      }                                                   \
+      break;                                              \
+    case 2:                                               \
+      switch (value_elsize) {                             \
+        case 1:                                           \
+          ACTION(uint16_t, uint8_t, __VA_ARGS__);         \
+          break;                                          \
+        case 2:                                           \
+          ACTION(uint16_t, uint16_t, __VA_ARGS__);        \
+          break;                                          \
+        case 4:                                           \
+          ACTION(uint16_t, uint32_t, __VA_ARGS__);        \
+          break;                                          \
+        case 8:                                           \
+          ACTION(uint16_t, uint64_t, __VA_ARGS__);        \
+          break;                                          \
+      }                                                   \
+      break;                                              \
+    case 4:                                               \
+      switch (value_elsize) {                             \
+        case 1:                                           \
+          ACTION(uint32_t, uint8_t, __VA_ARGS__);         \
+          break;                                          \
+        case 2:                                           \
+          ACTION(uint32_t, uint16_t, __VA_ARGS__);        \
+          break;                                          \
+        case 4:                                           \
+          ACTION(uint32_t, uint32_t, __VA_ARGS__);        \
+          break;                                          \
+        case 8:                                           \
+          ACTION(uint32_t, uint64_t, __VA_ARGS__);        \
+          break;                                          \
+      }                                                   \
+      break;                                              \
+    case 8:                                               \
+      switch (value_elsize) {                             \
+        case 1:                                           \
+          ACTION(int64_t, uint8_t, __VA_ARGS__);          \
+          break;                                          \
+        case 2:                                           \
+          ACTION(int64_t, uint16_t, __VA_ARGS__);         \
+          break;                                          \
+        case 4:                                           \
+          ACTION(int64_t, uint32_t, __VA_ARGS__);         \
+          break;                                          \
+        case 8:                                           \
+          ACTION(int64_t, uint64_t, __VA_ARGS__);         \
+          break;                                          \
+      }                                                   \
+      break;                                              \
   }
-
diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc
index 32f7c6bd526..ebdb50ba89e 100644
--- a/cpp/src/arrow/tensor/coo_converter.cc
+++ b/cpp/src/arrow/tensor/coo_converter.cc
@@ -202,11 +202,14 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin {
         tensor_data += value_elsize;
       }
     } else if (tensor_.is_row_major()) {
-      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, ROW_MAJOR, indices, values, nonzero_count);
+      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, ROW_MAJOR, indices, values,
+               nonzero_count);
     } else if (tensor_.is_column_major()) {
-      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, COLUMN_MAJOR, indices, values, nonzero_count);
+      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, COLUMN_MAJOR, indices, values,
+               nonzero_count);
     } else {
-      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, STRIDED, indices, values, nonzero_count);
+      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, STRIDED, indices, values,
+               nonzero_count);
     }
 
     // make results

From aa836ad335f48f7aedab92d972a0d10944485845 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 10 Jul 2020 09:31:24 +0900
Subject: [PATCH 4/4] Fix for VC++

---
 cpp/src/arrow/tensor/coo_converter.cc | 32 +++++++++++++--------------
 cpp/src/arrow/util/macros.h           |  1 +
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc
index ebdb50ba89e..fc76488b795 100644
--- a/cpp/src/arrow/tensor/coo_converter.cc
+++ b/cpp/src/arrow/tensor/coo_converter.cc
@@ -27,6 +27,7 @@
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/util/checked_cast.h"
+#include "arrow/util/macros.h"
 #include "arrow/visitor_inline.h"
 
 namespace arrow {
@@ -139,23 +140,20 @@ void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices,
   }
 }
 
-#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, indices, values, size) \
-  ConvertRowMajorTensor<index_type, value_type>(                                \
-      tensor_, reinterpret_cast<index_type*>(indices),                          \
-      reinterpret_cast<value_type*>(values), size)
+#define CONVERT_TENSOR(func, index_type, value_type, indices, values, size)     \
+  func<index_type, value_type>(tensor_, reinterpret_cast<index_type*>(indices), \
+                               reinterpret_cast<value_type*>(values), size)
 
-#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, indices, values, size) \
-  ConvertColumnMajorTensor<index_type, value_type>(                                \
-      tensor_, reinterpret_cast<index_type*>(indices),                             \
-      reinterpret_cast<value_type*>(values), size)
+// Using ARROW_EXPAND is necessary to expand __VA_ARGS__ correctly on VC++.
+#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, ...) \
+  ARROW_EXPAND(CONVERT_TENSOR(ConvertRowMajorTensor, index_type, value_type, __VA_ARGS__))
 
-#define CONVERT_STRIDED_TENSOR(index_type, value_type, indices, values, size) \
-  ConvertStridedTensor<index_type, value_type>(                               \
-      tensor_, reinterpret_cast<index_type*>(indices),                        \
-      reinterpret_cast<value_type*>(values), size)
+#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, ...) \
+  ARROW_EXPAND(                                                  \
+      CONVERT_TENSOR(ConvertColumnMajorTensor, index_type, value_type, __VA_ARGS__))
 
-#define CONVERT_TENSOR(index_type, value_type, KIND, ...) \
-  CONVERT_##KIND##_TENSOR(index_type, value_type, __VA_ARGS__)
+#define CONVERT_STRIDED_TENSOR(index_type, value_type, ...) \
+  ARROW_EXPAND(CONVERT_TENSOR(ConvertStridedTensor, index_type, value_type, __VA_ARGS__))
 
 // ----------------------------------------------------------------------
 // SparseTensorConverter for SparseCOOIndex
@@ -202,13 +200,13 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin {
         tensor_data += value_elsize;
       }
     } else if (tensor_.is_row_major()) {
-      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, ROW_MAJOR, indices, values,
+      DISPATCH(CONVERT_ROW_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
                nonzero_count);
     } else if (tensor_.is_column_major()) {
-      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, COLUMN_MAJOR, indices, values,
+      DISPATCH(CONVERT_COLUMN_MAJOR_TENSOR, index_elsize, value_elsize, indices, values,
                nonzero_count);
     } else {
-      DISPATCH(CONVERT_TENSOR, index_elsize, value_elsize, STRIDED, indices, values,
+      DISPATCH(CONVERT_STRIDED_TENSOR, index_elsize, value_elsize, indices, values,
                nonzero_count);
     }
 
diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h
index 2e4379880a4..548cc041ec8 100644
--- a/cpp/src/arrow/util/macros.h
+++ b/cpp/src/arrow/util/macros.h
@@ -19,6 +19,7 @@
 
 #include <cstdint>
 
+#define ARROW_EXPAND(x) x
 #define ARROW_STRINGIFY(x) #x
 #define ARROW_CONCAT(x, y) x##y