From 16464615e3469d6149684fbd722bed1deec550c9 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Fri, 12 Oct 2018 17:41:21 +0900 Subject: [PATCH 1/3] Introduce NumericTensor class This commit defines the new NumericTensor class as a subclass of Tensor class. NumericTensor extends Tensor class by adding a member function to access element values in a tensor. --- cpp/src/arrow/tensor-test.cc | 52 ++++++++++++++++++++++++++++++++++ cpp/src/arrow/tensor.cc | 55 ++++++++++++++++++++++++++++++++++++ cpp/src/arrow/tensor.h | 30 +++++++++++++++++++- 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc index ee8205136f8..043ebe047a8 100644 --- a/cpp/src/arrow/tensor-test.cc +++ b/cpp/src/arrow/tensor-test.cc @@ -104,4 +104,56 @@ TEST(TestTensor, ZeroDimensionalTensor) { ASSERT_EQ(t.strides().size(), 1); } +TEST(TestNumericTensor, ElementAccess) { + std::vector shape = {3, 4}; + + std::vector values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::shared_ptr buffer_i64(Buffer::Wrap(values_i64)); + NumericTensor t_i64(buffer_i64, shape); + + ASSERT_EQ(1, t_i64.Value({0, 0})); + ASSERT_EQ(5, t_i64.Value({1, 0})); + ASSERT_EQ(6, t_i64.Value({1, 1})); + ASSERT_EQ(11, t_i64.Value({2, 2})); + + std::vector values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f, + 7.1f, 8.1f, 9.1f, 10.1f, 11.1f, 12.1f}; + std::shared_ptr buffer_f32(Buffer::Wrap(values_f32)); + NumericTensor t_f32(buffer_f32, shape); + + ASSERT_EQ(1.1f, t_f32.Value({0, 0})); + ASSERT_EQ(5.1f, t_f32.Value({1, 0})); + ASSERT_EQ(6.1f, t_f32.Value({1, 1})); + ASSERT_EQ(11.1f, t_f32.Value({2, 2})); +} + +TEST(TestNumericTensor, ElementAccessWithStrides) { + std::vector shape = {3, 4}; + + const int64_t i64_size = sizeof(int64_t); + std::vector values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7, + 8, 0, 0, 9, 10, 11, 12, 0, 0}; + std::vector strides_i64 = {i64_size * 6, i64_size}; + std::shared_ptr buffer_i64(Buffer::Wrap(values_i64)); + NumericTensor t_i64(buffer_i64, shape, strides_i64); + + ASSERT_EQ(1, t_i64.Value({0, 0})); + ASSERT_EQ(5, t_i64.Value({1, 0})); + ASSERT_EQ(6, t_i64.Value({1, 1})); + ASSERT_EQ(11, t_i64.Value({2, 2})); + + const int64_t f32_size = sizeof(float); + std::vector values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f, + 5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f, + 9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f}; + std::vector strides_f32 = {f32_size * 6, f32_size}; + std::shared_ptr buffer_f32(Buffer::Wrap(values_f32)); + NumericTensor t_f32(buffer_f32, shape, strides_f32); + + ASSERT_EQ(1.1f, t_f32.Value({0, 0})); + ASSERT_EQ(5.1f, t_f32.Value({1, 0})); + ASSERT_EQ(6.1f, t_f32.Value({1, 1})); + ASSERT_EQ(11.1f, t_f32.Value({2, 2})); +} + } // namespace arrow diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 5b44a031bee..e218a525187 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -26,6 +26,7 @@ #include "arrow/compare.h" #include "arrow/type.h" +#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging.h" @@ -121,4 +122,58 @@ Type::type Tensor::type_id() const { return type_->id(); } bool Tensor::Equals(const Tensor& other) const { return TensorEquals(*this, other); } +// ---------------------------------------------------------------------- +// NumericTensor + +template +NumericTensor::NumericTensor(const std::shared_ptr& data, + const std::vector& shape) + : NumericTensor(data, shape, {}, {}) {} + +template +NumericTensor::NumericTensor(const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides) + : NumericTensor(data, shape, strides, {}) {} + +template +NumericTensor::NumericTensor(const std::shared_ptr& data, + const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names) + : Tensor(TypeTraits::type_singleton(), data, shape, strides, dim_names) {} + +template +int64_t NumericTensor::CalculateValueOffset( + const std::vector& index) const { + int64_t offset = 0; + if (strides_.size() > 0) { + for (size_t i = 0; i < index.size(); ++i) { + offset += index[i] * strides_[i]; + } + } else { + for (size_t i = 0; i < index.size(); ++i) { + offset = index[i] + offset * shape_[i]; + } + offset *= static_cast(sizeof(value_type)); + } + + return offset; +} + +// ---------------------------------------------------------------------- +// Instantiate templates + +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; +template class ARROW_TEMPLATE_EXPORT NumericTensor; + } // namespace arrow diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index 699dc039309..a9b5df81fa1 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -62,7 +62,7 @@ class ARROW_EXPORT Tensor { Tensor(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& strides); - /// Constructor with strides and dimension names + /// Constructor with non-negative strides and dimension names Tensor(const std::shared_ptr& type, const std::shared_ptr& data, const std::vector& shape, const std::vector& strides, const std::vector& dim_names); @@ -114,6 +114,34 @@ class ARROW_EXPORT Tensor { ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor); }; +template +class ARROW_EXPORT NumericTensor : public Tensor { + public: + using TypeClass = TYPE; + using value_type = typename TypeClass::c_type; + + /// Constructor with no dimension names or strides, data assumed to be row-major + NumericTensor(const std::shared_ptr& data, const std::vector& shape); + + /// Constructor with non-negative strides + NumericTensor(const std::shared_ptr& data, const std::vector& shape, + const std::vector& strides); + + /// Constructor with non-negative strides and dimension names + NumericTensor(const std::shared_ptr& data, const std::vector& shape, + const std::vector& strides, + const std::vector& dim_names); + + const value_type& Value(const std::vector& index) const { + int64_t offset = CalculateValueOffset(index); + const value_type* ptr = reinterpret_cast(raw_data() + offset); + return *ptr; + } + + protected: + int64_t CalculateValueOffset(const std::vector& index) const; +}; + } // namespace arrow #endif // ARROW_TENSOR_H From 14fa5279566fb0e49652ae307fb10c2ee545889d Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Wed, 24 Oct 2018 17:02:41 +0900 Subject: [PATCH 2/3] Remove needless cases Tensor's strides_ is always filled. --- cpp/src/arrow/tensor.cc | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index e218a525187..589ee995e21 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -147,17 +147,9 @@ template int64_t NumericTensor::CalculateValueOffset( const std::vector& index) const { int64_t offset = 0; - if (strides_.size() > 0) { - for (size_t i = 0; i < index.size(); ++i) { - offset += index[i] * strides_[i]; - } - } else { - for (size_t i = 0; i < index.size(); ++i) { - offset = index[i] + offset * shape_[i]; - } - offset *= static_cast(sizeof(value_type)); + for (size_t i = 0; i < index.size(); ++i) { + offset += index[i] * strides_[i]; } - return offset; } From 37f0bb4ac40c17c7598e4bf0ac5b4d788b60b905 Mon Sep 17 00:00:00 2001 From: Kenta Murata Date: Wed, 24 Oct 2018 18:04:19 +0900 Subject: [PATCH 3/3] Add tests for column-major strides --- cpp/src/arrow/tensor-test.cc | 37 +++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/tensor-test.cc b/cpp/src/arrow/tensor-test.cc index 043ebe047a8..a437e6db5ad 100644 --- a/cpp/src/arrow/tensor-test.cc +++ b/cpp/src/arrow/tensor-test.cc @@ -127,7 +127,7 @@ TEST(TestNumericTensor, ElementAccess) { ASSERT_EQ(11.1f, t_f32.Value({2, 2})); } -TEST(TestNumericTensor, ElementAccessWithStrides) { +TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) { std::vector shape = {3, 4}; const int64_t i64_size = sizeof(int64_t); @@ -138,6 +138,8 @@ TEST(TestNumericTensor, ElementAccessWithStrides) { NumericTensor t_i64(buffer_i64, shape, strides_i64); ASSERT_EQ(1, t_i64.Value({0, 0})); + ASSERT_EQ(2, t_i64.Value({0, 1})); + ASSERT_EQ(4, t_i64.Value({0, 3})); ASSERT_EQ(5, t_i64.Value({1, 0})); ASSERT_EQ(6, t_i64.Value({1, 1})); ASSERT_EQ(11, t_i64.Value({2, 2})); @@ -151,6 +153,39 @@ TEST(TestNumericTensor, ElementAccessWithStrides) { NumericTensor t_f32(buffer_f32, shape, strides_f32); ASSERT_EQ(1.1f, t_f32.Value({0, 0})); + ASSERT_EQ(2.1f, t_f32.Value({0, 1})); + ASSERT_EQ(4.1f, t_f32.Value({0, 3})); + ASSERT_EQ(5.1f, t_f32.Value({1, 0})); + ASSERT_EQ(6.1f, t_f32.Value({1, 1})); + ASSERT_EQ(11.1f, t_f32.Value({2, 2})); +} + +TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) { + std::vector shape = {3, 4}; + + const int64_t i64_size = sizeof(int64_t); + std::vector values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0}; + std::vector strides_i64 = {i64_size, i64_size * 4}; + std::shared_ptr buffer_i64(Buffer::Wrap(values_i64)); + NumericTensor t_i64(buffer_i64, shape, strides_i64); + + ASSERT_EQ(1, t_i64.Value({0, 0})); + ASSERT_EQ(2, t_i64.Value({0, 1})); + ASSERT_EQ(4, t_i64.Value({0, 3})); + ASSERT_EQ(5, t_i64.Value({1, 0})); + ASSERT_EQ(6, t_i64.Value({1, 1})); + ASSERT_EQ(11, t_i64.Value({2, 2})); + + const int64_t f32_size = sizeof(float); + std::vector values_f32 = {1.1f, 5.1f, 9.1f, 0.0f, 2.1f, 6.1f, 10.1f, 0.0f, + 3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f}; + std::vector strides_f32 = {f32_size, f32_size * 4}; + std::shared_ptr buffer_f32(Buffer::Wrap(values_f32)); + NumericTensor t_f32(buffer_f32, shape, strides_f32); + + ASSERT_EQ(1.1f, t_f32.Value({0, 0})); + ASSERT_EQ(2.1f, t_f32.Value({0, 1})); + ASSERT_EQ(4.1f, t_f32.Value({0, 3})); ASSERT_EQ(5.1f, t_f32.Value({1, 0})); ASSERT_EQ(6.1f, t_f32.Value({1, 1})); ASSERT_EQ(11.1f, t_f32.Value({2, 2}));