Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 39 additions & 6 deletions cpp/src/arrow/array/array_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <vector>

#include "arrow/array/data.h"
#include "arrow/array/statistics.h"
#include "arrow/buffer.h"
#include "arrow/compare.h"
#include "arrow/result.h"
Expand Down Expand Up @@ -232,6 +233,11 @@ class ARROW_EXPORT Array {
/// \return DeviceAllocationType
DeviceAllocationType device_type() const { return data_->device_type(); }

/// \brief Return the statistics of this Array
///
/// \return const std::shared_ptr<ArrayStatistics>&
const std::shared_ptr<ArrayStatistics>& statistics() const { return statistics_; }

protected:
Array() = default;
ARROW_DEFAULT_MOVE_AND_ASSIGN(Array);
Expand All @@ -240,7 +246,20 @@ class ARROW_EXPORT Array {
const uint8_t* null_bitmap_data_ = NULLPTR;

/// Protected method for constructors
void SetData(const std::shared_ptr<ArrayData>& data) {
void Init(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics) {
ValidateData(data);
SetData(data);
if (statistics) {
SetStatistics(statistics);
}
}

/// Protected method for constructors
virtual void ValidateData(const std::shared_ptr<ArrayData>& data) {}

/// Protected method for constructors
virtual void SetData(const std::shared_ptr<ArrayData>& data) {
if (data->buffers.size() > 0) {
null_bitmap_data_ = data->GetValuesSafe<uint8_t>(0, /*offset=*/0);
} else {
Expand All @@ -249,6 +268,14 @@ class ARROW_EXPORT Array {
data_ = data;
}

// The statistics for this Array.
std::shared_ptr<ArrayStatistics> statistics_;

/// Protected method for constructors
void SetStatistics(const std::shared_ptr<ArrayStatistics>& statistics) {
statistics_ = statistics;
}

private:
ARROW_DISALLOW_COPY_AND_ASSIGN(Array);

Expand Down Expand Up @@ -280,12 +307,15 @@ class ARROW_EXPORT PrimitiveArray : public FlatArray {
protected:
PrimitiveArray() : raw_values_(NULLPTR) {}

void SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
void SetData(const std::shared_ptr<ArrayData>& data) override {
Array::SetData(data);
raw_values_ = data->GetValuesSafe<uint8_t>(1, /*offset=*/0);
}

explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
explicit PrimitiveArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

const uint8_t* raw_values_;
};
Expand All @@ -295,11 +325,14 @@ class ARROW_EXPORT NullArray : public FlatArray {
public:
using TypeClass = NullType;

explicit NullArray(const std::shared_ptr<ArrayData>& data) { SetData(data); }
explicit NullArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}
explicit NullArray(int64_t length);

private:
void SetData(const std::shared_ptr<ArrayData>& data) {
void SetData(const std::shared_ptr<ArrayData>& data) override {
null_bitmap_data_ = NULLPTR;
data->null_count = data->length;
data_ = data;
Expand Down
22 changes: 6 additions & 16 deletions cpp/src/arrow/array/array_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,8 @@ namespace arrow {

using internal::checked_cast;

BinaryArray::BinaryArray(const std::shared_ptr<ArrayData>& data) {
void BinaryArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK(is_binary_like(data->type->id()));
SetData(data);
}

BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
Expand All @@ -45,9 +44,8 @@ BinaryArray::BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_of
null_count, offset));
}

LargeBinaryArray::LargeBinaryArray(const std::shared_ptr<ArrayData>& data) {
void LargeBinaryArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK(is_large_binary_like(data->type->id()));
SetData(data);
}

LargeBinaryArray::LargeBinaryArray(int64_t length,
Expand All @@ -59,9 +57,8 @@ LargeBinaryArray::LargeBinaryArray(int64_t length,
null_count, offset));
}

StringArray::StringArray(const std::shared_ptr<ArrayData>& data) {
void StringArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::STRING);
SetData(data);
}

StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
Expand All @@ -74,9 +71,8 @@ StringArray::StringArray(int64_t length, const std::shared_ptr<Buffer>& value_of

Status StringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

LargeStringArray::LargeStringArray(const std::shared_ptr<ArrayData>& data) {
void LargeStringArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::LARGE_STRING);
SetData(data);
}

LargeStringArray::LargeStringArray(int64_t length,
Expand All @@ -90,9 +86,8 @@ LargeStringArray::LargeStringArray(int64_t length,

Status LargeStringArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

BinaryViewArray::BinaryViewArray(std::shared_ptr<ArrayData> data) {
void BinaryViewArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::BINARY_VIEW);
SetData(std::move(data));
}

BinaryViewArray::BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
Expand All @@ -110,17 +105,12 @@ std::string_view BinaryViewArray::GetView(int64_t i) const {
return util::FromBinaryView(raw_values_[i], data_buffers);
}

StringViewArray::StringViewArray(std::shared_ptr<ArrayData> data) {
void StringViewArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::STRING_VIEW);
SetData(std::move(data));
}

Status StringViewArray::ValidateUTF8() const { return internal::ValidateUTF8(*data_); }

FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data) {
SetData(data);
}

FixedSizeBinaryArray::FixedSizeBinaryArray(const std::shared_ptr<DataType>& type,
int64_t length,
const std::shared_ptr<Buffer>& data,
Expand Down
67 changes: 53 additions & 14 deletions cpp/src/arrow/array/array_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ class BaseBinaryArray : public FlatArray {
BaseBinaryArray() = default;

// Protected method for constructors
void SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
void SetData(const std::shared_ptr<ArrayData>& data) override {
Array::SetData(data);
raw_value_offsets_ = data->GetValuesSafe<offset_type>(1, /*offset=*/0);
raw_data_ = data->GetValuesSafe<uint8_t>(2, /*offset=*/0);
}
Expand All @@ -155,7 +155,10 @@ class BaseBinaryArray : public FlatArray {
/// Concrete Array class for variable-size binary data
class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
public:
explicit BinaryArray(const std::shared_ptr<ArrayData>& data);
explicit BinaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

BinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -165,14 +168,19 @@ class ARROW_EXPORT BinaryArray : public BaseBinaryArray<BinaryType> {
protected:
// For subclasses such as StringArray
BinaryArray() : BaseBinaryArray() {}

void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

/// Concrete Array class for variable-size string (utf-8) data
class ARROW_EXPORT StringArray : public BinaryArray {
public:
using TypeClass = StringType;

explicit StringArray(const std::shared_ptr<ArrayData>& data);
explicit StringArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

StringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -183,12 +191,19 @@ class ARROW_EXPORT StringArray : public BinaryArray {
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;

protected:
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

/// Concrete Array class for large variable-size binary data
class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
public:
explicit LargeBinaryArray(const std::shared_ptr<ArrayData>& data);
explicit LargeBinaryArray(
const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

LargeBinaryArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -198,14 +213,19 @@ class ARROW_EXPORT LargeBinaryArray : public BaseBinaryArray<LargeBinaryType> {
protected:
// For subclasses such as LargeStringArray
LargeBinaryArray() : BaseBinaryArray() {}
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

/// Concrete Array class for large variable-size string (utf-8) data
class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
public:
using TypeClass = LargeStringType;

explicit LargeStringArray(const std::shared_ptr<ArrayData>& data);
explicit LargeStringArray(
const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

LargeStringArray(int64_t length, const std::shared_ptr<Buffer>& value_offsets,
const std::shared_ptr<Buffer>& data,
Expand All @@ -216,6 +236,9 @@ class ARROW_EXPORT LargeStringArray : public LargeBinaryArray {
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;

protected:
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

// ----------------------------------------------------------------------
Expand All @@ -229,7 +252,10 @@ class ARROW_EXPORT BinaryViewArray : public FlatArray {
using IteratorType = stl::ArrayIterator<BinaryViewArray>;
using c_type = BinaryViewType::c_type;

explicit BinaryViewArray(std::shared_ptr<ArrayData> data);
explicit BinaryViewArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

BinaryViewArray(std::shared_ptr<DataType> type, int64_t length,
std::shared_ptr<Buffer> views, BufferVector data_buffers,
Expand All @@ -251,10 +277,13 @@ class ARROW_EXPORT BinaryViewArray : public FlatArray {
IteratorType end() const { return IteratorType(*this, length()); }

protected:
using FlatArray::FlatArray;
// This constructor defers Init() to a derived array class
BinaryViewArray() = default;

void SetData(std::shared_ptr<ArrayData> data) {
FlatArray::SetData(std::move(data));
void ValidateData(const std::shared_ptr<ArrayData>& data) override;

void SetData(const std::shared_ptr<ArrayData>& data) override {
FlatArray::SetData(data);
raw_values_ = data_->GetValuesSafe<c_type>(1);
}

Expand All @@ -267,14 +296,20 @@ class ARROW_EXPORT StringViewArray : public BinaryViewArray {
public:
using TypeClass = StringViewType;

explicit StringViewArray(std::shared_ptr<ArrayData> data);
explicit StringViewArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

using BinaryViewArray::BinaryViewArray;

/// \brief Validate that this array contains only valid UTF8 entries
///
/// This check is also implied by ValidateFull()
Status ValidateUTF8() const;

protected:
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
};

// ----------------------------------------------------------------------
Expand All @@ -286,7 +321,11 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
using TypeClass = FixedSizeBinaryType;
using IteratorType = stl::ArrayIterator<FixedSizeBinaryArray>;

explicit FixedSizeBinaryArray(const std::shared_ptr<ArrayData>& data);
explicit FixedSizeBinaryArray(
const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR) {
Init(data, statistics);
}

FixedSizeBinaryArray(const std::shared_ptr<DataType>& type, int64_t length,
const std::shared_ptr<Buffer>& data,
Expand Down Expand Up @@ -315,8 +354,8 @@ class ARROW_EXPORT FixedSizeBinaryArray : public PrimitiveArray {
IteratorType end() const { return IteratorType(*this, length()); }

protected:
void SetData(const std::shared_ptr<ArrayData>& data) {
this->PrimitiveArray::SetData(data);
void SetData(const std::shared_ptr<ArrayData>& data) override {
PrimitiveArray::SetData(data);
byte_width_ =
internal::checked_cast<const FixedSizeBinaryType&>(*type()).byte_width();
}
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,15 @@ int64_t DictionaryArray::GetValueIndex(int64_t i) const {
}
}

DictionaryArray::DictionaryArray(const std::shared_ptr<ArrayData>& data)
DictionaryArray::DictionaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics)
: dict_type_(checked_cast<const DictionaryType*>(data->type.get())) {
Init(data, statistics);
}

void DictionaryArray::ValidateData(const std::shared_ptr<ArrayData>& data) {
ARROW_CHECK_EQ(data->type->id(), Type::DICTIONARY);
ARROW_CHECK_NE(data->dictionary, nullptr);
SetData(data);
}

void DictionaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/array/array_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ class ARROW_EXPORT DictionaryArray : public Array {
public:
using TypeClass = DictionaryType;

explicit DictionaryArray(const std::shared_ptr<ArrayData>& data);
explicit DictionaryArray(const std::shared_ptr<ArrayData>& data,
const std::shared_ptr<ArrayStatistics>& statistics = NULLPTR);

DictionaryArray(const std::shared_ptr<DataType>& type,
const std::shared_ptr<Array>& indices,
Expand Down Expand Up @@ -114,7 +115,8 @@ class ARROW_EXPORT DictionaryArray : public Array {
const DictionaryType* dict_type() const { return dict_type_; }

private:
void SetData(const std::shared_ptr<ArrayData>& data);
void ValidateData(const std::shared_ptr<ArrayData>& data) override;
void SetData(const std::shared_ptr<ArrayData>& data) override;
const DictionaryType* dict_type_;
std::shared_ptr<Array> indices_;

Expand Down
Loading