Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions cpp/src/arrow/tensor-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,91 @@ TEST(TestTensor, ZeroDimensionalTensor) {
ASSERT_EQ(t.strides().size(), 1);
}

TEST(TestNumericTensor, ElementAccess) {
std::vector<int64_t> shape = {3, 4};

std::vector<int64_t> values_i64 = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape);

ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(5, t_i64.Value({1, 0}));
ASSERT_EQ(6, t_i64.Value({1, 1}));
ASSERT_EQ(11, t_i64.Value({2, 2}));

std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f,
7.1f, 8.1f, 9.1f, 10.1f, 11.1f, 12.1f};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape);

ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}

TEST(TestNumericTensor, ElementAccessWithRowMajorStrides) {
std::vector<int64_t> shape = {3, 4};

const int64_t i64_size = sizeof(int64_t);
std::vector<int64_t> values_i64 = {1, 2, 3, 4, 0, 0, 5, 6, 7,
8, 0, 0, 9, 10, 11, 12, 0, 0};
std::vector<int64_t> strides_i64 = {i64_size * 6, i64_size};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);

ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(2, t_i64.Value({0, 1}));
ASSERT_EQ(4, t_i64.Value({0, 3}));
ASSERT_EQ(5, t_i64.Value({1, 0}));
ASSERT_EQ(6, t_i64.Value({1, 1}));
ASSERT_EQ(11, t_i64.Value({2, 2}));

const int64_t f32_size = sizeof(float);
std::vector<float> values_f32 = {1.1f, 2.1f, 3.1f, 4.1f, 0.0f, 0.0f,
5.1f, 6.1f, 7.1f, 8.1f, 0.0f, 0.0f,
9.1f, 10.1f, 11.1f, 12.1f, 0.0f, 0.0f};
std::vector<int64_t> strides_f32 = {f32_size * 6, f32_size};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);

ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}

TEST(TestNumericTensor, ElementAccessWithColumnMajorStrides) {
std::vector<int64_t> shape = {3, 4};

const int64_t i64_size = sizeof(int64_t);
std::vector<int64_t> values_i64 = {1, 5, 9, 0, 2, 6, 10, 0, 3, 7, 11, 0, 4, 8, 12, 0};
std::vector<int64_t> strides_i64 = {i64_size, i64_size * 4};
std::shared_ptr<Buffer> buffer_i64(Buffer::Wrap(values_i64));
NumericTensor<Int64Type> t_i64(buffer_i64, shape, strides_i64);

ASSERT_EQ(1, t_i64.Value({0, 0}));
ASSERT_EQ(2, t_i64.Value({0, 1}));
ASSERT_EQ(4, t_i64.Value({0, 3}));
ASSERT_EQ(5, t_i64.Value({1, 0}));
ASSERT_EQ(6, t_i64.Value({1, 1}));
ASSERT_EQ(11, t_i64.Value({2, 2}));

const int64_t f32_size = sizeof(float);
std::vector<float> values_f32 = {1.1f, 5.1f, 9.1f, 0.0f, 2.1f, 6.1f, 10.1f, 0.0f,
3.1f, 7.1f, 11.1f, 0.0f, 4.1f, 8.1f, 12.1f, 0.0f};
std::vector<int64_t> strides_f32 = {f32_size, f32_size * 4};
std::shared_ptr<Buffer> buffer_f32(Buffer::Wrap(values_f32));
NumericTensor<FloatType> t_f32(buffer_f32, shape, strides_f32);

ASSERT_EQ(1.1f, t_f32.Value({0, 0}));
ASSERT_EQ(2.1f, t_f32.Value({0, 1}));
ASSERT_EQ(4.1f, t_f32.Value({0, 3}));
ASSERT_EQ(5.1f, t_f32.Value({1, 0}));
ASSERT_EQ(6.1f, t_f32.Value({1, 1}));
ASSERT_EQ(11.1f, t_f32.Value({2, 2}));
}

} // namespace arrow
47 changes: 47 additions & 0 deletions cpp/src/arrow/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

#include "arrow/compare.h"
#include "arrow/type.h"
#include "arrow/type_traits.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/logging.h"

Expand Down Expand Up @@ -121,4 +122,50 @@ Type::type Tensor::type_id() const { return type_->id(); }

bool Tensor::Equals(const Tensor& other) const { return TensorEquals(*this, other); }

// ----------------------------------------------------------------------
// NumericTensor

template <typename TYPE>
NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape)
: NumericTensor(data, shape, {}, {}) {}

template <typename TYPE>
NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides)
: NumericTensor(data, shape, strides, {}) {}

template <typename TYPE>
NumericTensor<TYPE>::NumericTensor(const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides,
const std::vector<std::string>& dim_names)
: Tensor(TypeTraits<TYPE>::type_singleton(), data, shape, strides, dim_names) {}

template <typename TYPE>
int64_t NumericTensor<TYPE>::CalculateValueOffset(
const std::vector<int64_t>& index) const {
int64_t offset = 0;
for (size_t i = 0; i < index.size(); ++i) {
offset += index[i] * strides_[i];
}
return offset;
}

// ----------------------------------------------------------------------
// Instantiate templates

template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt8Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt16Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt32Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<UInt64Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<Int8Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<Int16Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<Int32Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<Int64Type>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<HalfFloatType>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<FloatType>;
template class ARROW_TEMPLATE_EXPORT NumericTensor<DoubleType>;

} // namespace arrow
30 changes: 29 additions & 1 deletion cpp/src/arrow/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class ARROW_EXPORT Tensor {
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides);

/// Constructor with strides and dimension names
/// Constructor with non-negative strides and dimension names
Tensor(const std::shared_ptr<DataType>& type, const std::shared_ptr<Buffer>& data,
const std::vector<int64_t>& shape, const std::vector<int64_t>& strides,
const std::vector<std::string>& dim_names);
Expand Down Expand Up @@ -114,6 +114,34 @@ class ARROW_EXPORT Tensor {
ARROW_DISALLOW_COPY_AND_ASSIGN(Tensor);
};

template <typename TYPE>
class ARROW_EXPORT NumericTensor : public Tensor {
public:
using TypeClass = TYPE;
using value_type = typename TypeClass::c_type;

/// Constructor with no dimension names or strides, data assumed to be row-major
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape);

/// Constructor with non-negative strides
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides);

/// Constructor with non-negative strides and dimension names
NumericTensor(const std::shared_ptr<Buffer>& data, const std::vector<int64_t>& shape,
const std::vector<int64_t>& strides,
const std::vector<std::string>& dim_names);

const value_type& Value(const std::vector<int64_t>& index) const {
int64_t offset = CalculateValueOffset(index);
const value_type* ptr = reinterpret_cast<const value_type*>(raw_data() + offset);
return *ptr;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On some CPU architectures, this assumes the data is naturally aligned. It probably doesn't matter for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One way to tackle this is to make CalculateValueOffset return an offset in units of TYPE instead of bytes. It'll also make it easier to work with non-fixed-bytes in the future (also possibly correcting the problem with BOOL).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed this function to return an offset in units of TYPE.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please undo the change (sorry). The strides are in bytes units, so the offset has to be calculated in bytes units. When you're dividing the bytes offset by itemsize, you have no guarantee that there is no remainder (though that will be the common case).

In any case, the change didn't fix the issue, as raw_data() could be misaligned.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.

}

protected:
int64_t CalculateValueOffset(const std::vector<int64_t>& index) const;
};

} // namespace arrow

#endif // ARROW_TENSOR_H