From 16464615e3469d6149684fbd722bed1deec550c9 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Fri, 12 Oct 2018 17:41:21 +0900
Subject: [PATCH 1/3] Introduce NumericTensor class

This commit defines the new NumericTensor<T> class as a subclass
of Tensor class. NumericTensor<T> extends Tensor class by adding
a member function to access element values in a tensor.
---
 cpp/src/arrow/tensor-test.cc | 52 ++++++++++++++++++++++++++++++++++
 cpp/src/arrow/tensor.cc      | 55 ++++++++++++++++++++++++++++++++++++
 cpp/src/arrow/tensor.h       | 30 +++++++++++++++++++-
 3 files changed, 136 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
index ee8205136f8..043ebe047a8 100644
--- a/cpp/src/arrow/tensor-test.cc
+++ b/cpp/src/arrow/tensor-test.cc
@@ -104,4 +104,56 @@ TEST(TestTensor, ZeroDimensionalTensor) {
   ASSERT_EQ(t.strides().size(), 1);
 }
 
+TEST(TestNumericTensor, ElementAccess) {
+  std::vector<int64_t> shape = {3, 4};
+
+  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
+  std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
+  NumericTensor<Int64Type> t_i64(buffer_i64, shape);
+
+  ASSERT_EQ(1, t_i64.Value({0, 0}));
+  ASSERT_EQ(5, t_i64.Value({1, 0}));
+  ASSERT_EQ(6, t_i64.Value({1, 1}));
+  ASSERT_EQ(11, t_i64.Value({2, 2}));
+
+  std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f,  5.1f,  6.1f,
+                                   7.1f, 8.1f, 9.1f, 10.1f, 11.1f, 12.1f};
+  std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
+  NumericTensor<FloatType> t_f32(buffer_f32, shape);
+
+  ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
+  ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
+  ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
+  ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
+}
+
+TEST(TestNumericTensor, ElementAccessWithStrides) {
+  std::vector<int64_t> shape = {3, 4};
+
+  const int64_t i64_size = sizeof(int64_t);
+  std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0,  0,  5,  6, 7,
+                                     8, 0, 0, 9, 10, 11, 12, 0, 0};
+  std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
+  std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
+  NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
+
+  ASSERT_EQ(1, t_i64.Value({0, 0}));
+  ASSERT_EQ(5, t_i64.Value({1, 0}));
+  ASSERT_EQ(6, t_i64.Value({1, 1}));
+  ASSERT_EQ(11, t_i64.Value({2, 2}));
+
+  const int64_t f32_size = sizeof(float);
+  std::vector<float> values_f32 = {1.1f, 2.1f,  3.1f,  4.1f,  0.0f, 0.0f,
+                                   5.1f, 6.1f,  7.1f,  8.1f,  0.0f, 0.0f,
+                                   9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
+  std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
+  std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
+  NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
+
+  ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
+  ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
+  ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
+  ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
+}
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index 5b44a031bee..e218a525187 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -26,6 +26,7 @@
 
 #include "arrow/compare.h"
 #include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/checked_cast.h"
 #include "arrow/util/logging.h"
 
@@ -121,4 +122,58 @@ Type::type Tensor::type_id() const { return type_->id(); }
 
 bool Tensor::Equals(const Tensor& other) const { return TensorEquals(*this, other); }
 
+// ----------------------------------------------------------------------
+// NumericTensor
+
+template <typename TYPE>
+NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape)
+    : NumericTensor(data, shape, {}, {}) {}
+
+template <typename TYPE>
+NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape,
+                                   const std::vector<int64_t>& strides)
+    : NumericTensor(data, shape, strides, {}) {}
+
+template <typename TYPE>
+NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
+                                   const std::vector<int64_t>& shape,
+                                   const std::vector<int64_t>& strides,
+                                   const std::vector<std::string>& dim_names)
+    : Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}
+
+template <typename TYPE>
+int64_t NumericTensor<TYPE>::CalculateValueOffset(
+    const std::vector<int64_t>& index) const {
+  int64_t offset = 0;
+  if (strides_.size() > 0) {
+    for (size_t i = 0; i < index.size(); ++i) {
+      offset += index[i] * strides_[i];
+    }
+  } else {
+    for (size_t i = 0; i < index.size(); ++i) {
+      offset = index[i] + offset * shape_[i];
+    }
+    offset *= static_cast<int64_t>(sizeof(value_type));
+  }
+
+  return offset;
+}
+
+// ----------------------------------------------------------------------
+// Instantiate templates
+
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt8Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt16Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt32Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt64Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int8Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int16Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int32Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<Int64Type>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<HalfFloatType>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<FloatType>;
+template class ARROW_TEMPLATE_EXPORT NumericTensor<DoubleType>;
+
 }  // namespace arrow
diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h
index 699dc039309..a9b5df81fa1 100644
--- a/cpp/src/arrow/tensor.h
+++ b/cpp/src/arrow/tensor.h
@@ -62,7 +62,7 @@ class ARROW_EXPORT Tensor {
   Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
          const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);
 
-  /// Constructor with strides and dimension names
+  /// Constructor with non-negative strides and dimension names
   Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
          const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
          const std::vector<std::string>& dim_names);
@@ -114,6 +114,34 @@ class ARROW_EXPORT Tensor {
   ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
 };
 
+template <typename TYPE>
+class ARROW_EXPORT NumericTensor : public Tensor {
+ public:
+  using TypeClass = TYPE;
+  using value_type = typename TypeClass::c_type;
+
+  /// Constructor with no dimension names or strides, data assumed to be row-major
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape);
+
+  /// Constructor with non-negative strides
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                const std::vector<int64_t>& strides);
+
+  /// Constructor with non-negative strides and dimension names
+  NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
+                const std::vector<int64_t>& strides,
+                const std::vector<std::string>& dim_names);
+
+  const value_type& Value(const std::vector<int64_t>& index) const {
+    int64_t offset = CalculateValueOffset(index);
+    const value_type* ptr = reinterpret_cast<const value_type*>(raw_data() + offset);
+    return *ptr;
+  }
+
+ protected:
+  int64_t CalculateValueOffset(const std::vector<int64_t>& index) const;
+};
+
 }  // namespace arrow
 
 #endif  // ARROW_TENSOR_H

From 14fa5279566fb0e49652ae307fb10c2ee545889d Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 24 Oct 2018 17:02:41 +0900
Subject: [PATCH 2/3] Remove needless cases

Tensor's strides_ is always filled.
---
 cpp/src/arrow/tensor.cc | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc
index e218a525187..589ee995e21 100644
--- a/cpp/src/arrow/tensor.cc
+++ b/cpp/src/arrow/tensor.cc
@@ -147,17 +147,9 @@ template <typename TYPE>
 int64_t NumericTensor<TYPE>::CalculateValueOffset(
     const std::vector<int64_t>& index) const {
   int64_t offset = 0;
-  if (strides_.size() > 0) {
-    for (size_t i = 0; i < index.size(); ++i) {
-      offset += index[i] * strides_[i];
-    }
-  } else {
-    for (size_t i = 0; i < index.size(); ++i) {
-      offset = index[i] + offset * shape_[i];
-    }
-    offset *= static_cast<int64_t>(sizeof(value_type));
+  for (size_t i = 0; i < index.size(); ++i) {
+    offset += index[i] * strides_[i];
   }
-
   return offset;
 }
 

From 37f0bb4ac40c17c7598e4bf0ac5b4d788b60b905 Mon Sep 17 00:00:00 2001
From: Kenta Murata <mrkn@mrkn.jp>
Date: Wed, 24 Oct 2018 18:04:19 +0900
Subject: [PATCH 3/3] Add tests for column-major strides

---
 cpp/src/arrow/tensor-test.cc | 37 +++++++++++++++++++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc
index 043ebe047a8..a437e6db5ad 100644
--- a/cpp/src/arrow/tensor-test.cc
+++ b/cpp/src/arrow/tensor-test.cc
@@ -127,7 +127,7 @@ TEST(TestNumericTensor, ElementAccess) {
   ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
 }
 
-TEST(TestNumericTensor, ElementAccessWithStrides) {
+TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
   std::vector<int64_t> shape = {3, 4};
 
   const int64_t i64_size = sizeof(int64_t);
@@ -138,6 +138,8 @@ TEST(TestNumericTensor, ElementAccessWithStrides) {
   NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
 
   ASSERT_EQ(1, t_i64.Value({0, 0}));
+  ASSERT_EQ(2, t_i64.Value({0, 1}));
+  ASSERT_EQ(4, t_i64.Value({0, 3}));
   ASSERT_EQ(5, t_i64.Value({1, 0}));
   ASSERT_EQ(6, t_i64.Value({1, 1}));
   ASSERT_EQ(11, t_i64.Value({2, 2}));
@@ -151,6 +153,39 @@ TEST(TestNumericTensor, ElementAccessWithStrides) {
   NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
 
   ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
+  ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
+  ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
+  ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
+  ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
+  ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
+}
+
+TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
+  std::vector<int64_t> shape = {3, 4};
+
+  const int64_t i64_size = sizeof(int64_t);
+  std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
+  std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
+  std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
+  NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);
+
+  ASSERT_EQ(1, t_i64.Value({0, 0}));
+  ASSERT_EQ(2, t_i64.Value({0, 1}));
+  ASSERT_EQ(4, t_i64.Value({0, 3}));
+  ASSERT_EQ(5, t_i64.Value({1, 0}));
+  ASSERT_EQ(6, t_i64.Value({1, 1}));
+  ASSERT_EQ(11, t_i64.Value({2, 2}));
+
+  const int64_t f32_size = sizeof(float);
+  std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f,  0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
+                                   3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
+  std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
+  std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
+  NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);
+
+  ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
+  ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
+  ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
   ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
   ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
   ASSERT_EQ(11.1f, t_f32.Value({2, 2}));