Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,8 @@ set(ARROW_SRCS
table_builder.cc
tensor.cc
tensor/coo_converter.cc
tensor/csc_converter.cc
tensor/csf_converter.cc
tensor/csr_converter.cc
tensor/csx_converter.cc
type.cc
visitor.cc
c/bridge.cc
Expand Down Expand Up @@ -603,6 +602,7 @@ add_subdirectory(testing)
add_subdirectory(array)
add_subdirectory(c)
add_subdirectory(io)
add_subdirectory(tensor)
add_subdirectory(util)
add_subdirectory(vendored)

Expand Down
13 changes: 5 additions & 8 deletions cpp/src/arrow/compare.cc
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,7 @@ class RangeEqualsVisitor {
};

static bool IsEqualPrimitive(const PrimitiveArray& left, const PrimitiveArray& right) {
const auto& size_meta = checked_cast<const FixedWidthType&>(*left.type());
const int byte_width = size_meta.bit_width() / CHAR_BIT;
const int byte_width = internal::GetByteWidth(*left.type());

const uint8_t* left_data = nullptr;
const uint8_t* right_data = nullptr;
Expand Down Expand Up @@ -1079,11 +1078,10 @@ bool IntegerTensorEquals(const Tensor& left, const Tensor& right) {
if (!(left_row_major_p && right_row_major_p) &&
!(left_column_major_p && right_column_major_p)) {
const auto& type = checked_cast<const FixedWidthType&>(*left.type());
are_equal =
StridedIntegerTensorContentEquals(0, 0, 0, type.bit_width() / 8, left, right);
are_equal = StridedIntegerTensorContentEquals(0, 0, 0, internal::GetByteWidth(type),
left, right);
} else {
const auto& size_meta = checked_cast<const FixedWidthType&>(*left.type());
const int byte_width = size_meta.bit_width() / CHAR_BIT;
const int byte_width = internal::GetByteWidth(*left.type());
DCHECK_GT(byte_width, 0);

const uint8_t* left_data = left.data()->data();
Expand Down Expand Up @@ -1243,8 +1241,7 @@ struct SparseTensorEqualsImpl<SparseIndexType, SparseIndexType> {
return false;
}

const auto& size_meta = checked_cast<const FixedWidthType&>(*left.type());
const int byte_width = size_meta.bit_width() / CHAR_BIT;
const int byte_width = internal::GetByteWidth(*left.type());
DCHECK_GT(byte_width, 0);

const uint8_t* left_data = left.data()->data();
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/compute/kernels/vector_selection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/table.h"
#include "arrow/type.h"
#include "arrow/util/bit_block_counter.h"
#include "arrow/util/bit_util.h"
#include "arrow/util/bitmap_ops.h"
Expand All @@ -50,6 +51,7 @@ using internal::CheckIndexBounds;
using internal::CopyBitmap;
using internal::CountSetBits;
using internal::GetArrayView;
using internal::GetByteWidth;
using internal::OptionalBitBlockCounter;
using internal::OptionalBitIndexer;

Expand Down Expand Up @@ -1460,8 +1462,7 @@ struct Selection {
Status ExecTake() {
RETURN_NOT_OK(this->validity_builder.Reserve(output_length));
RETURN_NOT_OK(Init());
int index_width =
checked_cast<const FixedWidthType&>(*this->selection->type).bit_width() / 8;
int index_width = GetByteWidth(*this->selection->type);

// CTRP dispatch here
switch (index_width) {
Expand Down
9 changes: 4 additions & 5 deletions cpp/src/arrow/ipc/metadata_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ namespace arrow {

namespace flatbuf = org::apache::arrow::flatbuf;
using internal::checked_cast;
using internal::GetByteWidth;

namespace ipc {
namespace internal {
Expand Down Expand Up @@ -1003,8 +1004,8 @@ Status MakeSparseTensorIndexCSF(FBB& fbb, const SparseCSFIndex& sparse_index,
auto indices_type_offset = flatbuf::CreateInt(fbb, indices_value_type.bit_width(),
indices_value_type.is_signed());

const int64_t indptr_elem_size = indptr_value_type.bit_width() / 8;
const int64_t indices_elem_size = indices_value_type.bit_width() / 8;
const int64_t indptr_elem_size = GetByteWidth(indptr_value_type);
const int64_t indices_elem_size = GetByteWidth(indices_value_type);

int64_t offset = 0;
std::vector<flatbuf::Buffer> indptr, indices;
Expand Down Expand Up @@ -1169,9 +1170,7 @@ Result<std::shared_ptr<Buffer>> WriteTensorMessage(const Tensor& tensor,
using TensorOffset = flatbuffers::Offset<flatbuf::Tensor>;

FBB fbb;

const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*tensor.type());

flatbuf::Type fb_type_type;
Offset fb_type;
Expand Down
28 changes: 13 additions & 15 deletions cpp/src/arrow/ipc/read_write_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
namespace arrow {

using internal::checked_cast;
using internal::GetByteWidth;

namespace ipc {
namespace test {
Expand Down Expand Up @@ -1611,9 +1612,7 @@ class TestTensorRoundTrip : public ::testing::Test, public IpcTestFixture {
void CheckTensorRoundTrip(const Tensor& tensor) {
int32_t metadata_length;
int64_t body_length;

const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*tensor.type());

ASSERT_OK(mmap_->Seek(0));

Expand Down Expand Up @@ -1687,8 +1686,7 @@ class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture
void TearDown() { IpcTestFixture::TearDown(); }

void CheckSparseCOOTensorRoundTrip(const SparseCOOTensor& sparse_tensor) {
const auto& type = checked_cast<const FixedWidthType&>(*sparse_tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*sparse_tensor.type());
const int index_elem_size = sizeof(typename IndexValueType::c_type);

int32_t metadata_length;
Expand Down Expand Up @@ -1728,8 +1726,7 @@ class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture
std::is_same<SparseIndexType, SparseCSCIndex>::value,
"SparseIndexType must be either SparseCSRIndex or SparseCSCIndex");

const auto& type = checked_cast<const FixedWidthType&>(*sparse_tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*sparse_tensor.type());
const int index_elem_size = sizeof(typename IndexValueType::c_type);

int32_t metadata_length;
Expand Down Expand Up @@ -1770,8 +1767,7 @@ class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture
}

void CheckSparseCSFTensorRoundTrip(const SparseCSFTensor& sparse_tensor) {
const auto& type = checked_cast<const FixedWidthType&>(*sparse_tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*sparse_tensor.type());
const int index_elem_size = sizeof(typename IndexValueType::c_type);

int32_t metadata_length;
Expand Down Expand Up @@ -1841,13 +1837,13 @@ class TestSparseTensorRoundTrip : public ::testing::Test, public IpcTestFixture
}

template <typename ValueType>
std::shared_ptr<SparseCOOTensor> MakeSparseCOOTensor(
Result<std::shared_ptr<SparseCOOTensor>> MakeSparseCOOTensor(
const std::shared_ptr<SparseCOOIndex>& si, std::vector<ValueType>& sparse_values,
const std::vector<int64_t>& shape,
const std::vector<std::string>& dim_names = {}) const {
auto data = Buffer::Wrap(sparse_values);
return std::make_shared<SparseCOOTensor>(si, CTypeTraits<ValueType>::type_singleton(),
data, shape, dim_names);
return SparseCOOTensor::Make(si, CTypeTraits<ValueType>::type_singleton(), data,
shape, dim_names);
}
};

Expand Down Expand Up @@ -1895,7 +1891,8 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexRowMajor) {
std::vector<int64_t> shape = {2, 3, 4};
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
std::vector<int64_t> values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16};
auto st = this->MakeSparseCOOTensor(si, values, shape, dim_names);
std::shared_ptr<SparseCOOTensor> st;
ASSERT_OK_AND_ASSIGN(st, this->MakeSparseCOOTensor(si, values, shape, dim_names));

this->CheckSparseCOOTensorRoundTrip(*st);
}
Expand Down Expand Up @@ -1942,7 +1939,9 @@ TYPED_TEST_P(TestSparseTensorRoundTrip, WithSparseCOOIndexColumnMajor) {
std::vector<int64_t> shape = {2, 3, 4};
std::vector<std::string> dim_names = {"foo", "bar", "baz"};
std::vector<int64_t> values = {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16};
auto st = this->MakeSparseCOOTensor(si, values, shape, dim_names);

std::shared_ptr<SparseCOOTensor> st;
ASSERT_OK_AND_ASSIGN(st, this->MakeSparseCOOTensor(si, values, shape, dim_names));

this->CheckSparseCOOTensorRoundTrip(*st);
}
Expand Down Expand Up @@ -2023,7 +2022,6 @@ INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt16, TestSparseTensorRoundTrip, UInt16Type
INSTANTIATE_TYPED_TEST_SUITE_P(TestInt32, TestSparseTensorRoundTrip, Int32Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt32, TestSparseTensorRoundTrip, UInt32Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestInt64, TestSparseTensorRoundTrip, Int64Type);
INSTANTIATE_TYPED_TEST_SUITE_P(TestUInt64, TestSparseTensorRoundTrip, UInt64Type);

TEST(TestRecordBatchStreamReader, MalformedInput) {
const std::string empty_str = "";
Expand Down
17 changes: 6 additions & 11 deletions cpp/src/arrow/ipc/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ namespace flatbuf = org::apache::arrow::flatbuf;

using internal::checked_cast;
using internal::checked_pointer_cast;
using internal::GetByteWidth;

namespace ipc {

Expand Down Expand Up @@ -1225,8 +1226,7 @@ Result<std::shared_ptr<SparseIndex>> ReadSparseCOOIndex(

std::shared_ptr<DataType> indices_type;
RETURN_NOT_OK(internal::GetSparseCOOIndexMetadata(sparse_index, &indices_type));
const int64_t indices_elsize =
checked_cast<const IntegerType&>(*indices_type).bit_width() / 8;
const int64_t indices_elsize = GetByteWidth(*indices_type);

auto* indices_buffer = sparse_index->indicesBuffer();
ARROW_ASSIGN_OR_RAISE(auto indices_data,
Expand Down Expand Up @@ -1261,6 +1261,7 @@ Result<std::shared_ptr<SparseIndex>> ReadSparseCSXIndex(
std::shared_ptr<DataType> indptr_type, indices_type;
RETURN_NOT_OK(
internal::GetSparseCSXIndexMetadata(sparse_index, &indptr_type, &indices_type));
const int indptr_byte_width = GetByteWidth(*indptr_type);

auto* indptr_buffer = sparse_index->indptrBuffer();
ARROW_ASSIGN_OR_RAISE(auto indptr_data,
Expand All @@ -1271,19 +1272,15 @@ Result<std::shared_ptr<SparseIndex>> ReadSparseCSXIndex(
file->ReadAt(indices_buffer->offset(), indices_buffer->length()));

std::vector<int64_t> indices_shape({non_zero_length});
const auto indices_minimum_bytes =
indices_shape[0] * checked_pointer_cast<FixedWidthType>(indices_type)->bit_width() /
CHAR_BIT;
const auto indices_minimum_bytes = indices_shape[0] * GetByteWidth(*indices_type);
if (indices_minimum_bytes > indices_buffer->length()) {
return Status::Invalid("shape is inconsistent to the size of indices buffer");
}

switch (sparse_index->compressedAxis()) {
case flatbuf::SparseMatrixCompressedAxis::Row: {
std::vector<int64_t> indptr_shape({shape[0] + 1});
const int64_t indptr_minimum_bytes =
indptr_shape[0] *
checked_pointer_cast<FixedWidthType>(indptr_type)->bit_width() / CHAR_BIT;
const int64_t indptr_minimum_bytes = indptr_shape[0] * indptr_byte_width;
if (indptr_minimum_bytes > indptr_buffer->length()) {
return Status::Invalid("shape is inconsistent to the size of indptr buffer");
}
Expand All @@ -1293,9 +1290,7 @@ Result<std::shared_ptr<SparseIndex>> ReadSparseCSXIndex(
}
case flatbuf::SparseMatrixCompressedAxis::Column: {
std::vector<int64_t> indptr_shape({shape[1] + 1});
const int64_t indptr_minimum_bytes =
indptr_shape[0] *
checked_pointer_cast<FixedWidthType>(indptr_type)->bit_width() / CHAR_BIT;
const int64_t indptr_minimum_bytes = indptr_shape[0] * indptr_byte_width;
if (indptr_minimum_bytes > indptr_buffer->length()) {
return Status::Invalid("shape is inconsistent to the size of indptr buffer");
}
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/arrow/ipc/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ namespace arrow {
using internal::checked_cast;
using internal::checked_pointer_cast;
using internal::CopyBitmap;
using internal::GetByteWidth;

namespace ipc {

Expand Down Expand Up @@ -306,8 +307,7 @@ class RecordBatchSerializer {
Visit(const T& array) {
std::shared_ptr<Buffer> data = array.values();

const auto& fw_type = checked_cast<const FixedWidthType&>(*array.type());
const int64_t type_width = fw_type.bit_width() / 8;
const int64_t type_width = GetByteWidth(*array.type());
int64_t min_length = PaddedLength(array.length() * type_width);

if (NeedTruncate(array.offset(), data.get(), min_length)) {
Expand Down Expand Up @@ -683,8 +683,7 @@ Status WriteStridedTensorData(int dim_index, int64_t offset, int elem_size,

Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool,
std::unique_ptr<Tensor>* out) {
const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*tensor.type());

ARROW_ASSIGN_OR_RAISE(
auto scratch_space,
Expand All @@ -706,8 +705,7 @@ Status GetContiguousTensor(const Tensor& tensor, MemoryPool* pool,

Status WriteTensor(const Tensor& tensor, io::OutputStream* dst, int32_t* metadata_length,
int64_t* body_length) {
const auto& type = checked_cast<const FixedWidthType&>(*tensor.type());
const int elem_size = type.bit_width() / 8;
const int elem_size = GetByteWidth(*tensor.type());

*body_length = tensor.size() * elem_size;

Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/python/arrow_to_pandas.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class MemoryPool;

using internal::checked_cast;
using internal::CheckIndexBounds;
using internal::GetByteWidth;
using internal::OptionalParallelFor;

// ----------------------------------------------------------------------
Expand Down Expand Up @@ -259,7 +260,7 @@ inline const T* GetPrimitiveValues(const Array& arr) {
if (arr.length() == 0) {
return nullptr;
}
int elsize = checked_cast<const FixedWidthType&>(*arr.type()).bit_width() / 8;
const int elsize = GetByteWidth(*arr.type());
const auto& prim_arr = checked_cast<const PrimitiveArray&>(arr);
return reinterpret_cast<const T*>(prim_arr.values()->data() + arr.offset() * elsize);
}
Expand Down
Loading