Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ set(ARROW_SRCS
testing/util.cc
util/basic_decimal.cc
util/bit-util.cc
util/concatenate.cc
util/compression.cc
util/cpu-info.cc
util/decimal.cc
Expand Down
37 changes: 18 additions & 19 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,18 @@ std::shared_ptr<ArrayData> ArrayData::Make(const std::shared_ptr<DataType>& type
return std::make_shared<ArrayData>(type, length, null_count, offset);
}

ArrayData ArrayData::Slice(int64_t off, int64_t len) const {
DCHECK_LE(off, length);
len = std::min(length - off, len);
off += offset;

auto copy = *this;
copy.length = len;
copy.offset = off;
copy.null_count = null_count != 0 ? kUnknownNullCount : 0;
return copy;
}

int64_t ArrayData::GetNullCount() const {
if (ARROW_PREDICT_FALSE(this->null_count == kUnknownNullCount)) {
if (this->buffers[0]) {
Expand Down Expand Up @@ -125,21 +137,8 @@ bool Array::RangeEquals(const Array& other, int64_t start_idx, int64_t end_idx,
return ArrayRangeEquals(*this, other, start_idx, end_idx, other_start_idx);
}

static inline std::shared_ptr<ArrayData> SliceData(const ArrayData& data, int64_t offset,
int64_t length) {
DCHECK_LE(offset, data.length);
length = std::min(data.length - offset, length);
offset += data.offset;

auto new_data = data.Copy();
new_data->length = length;
new_data->offset = offset;
new_data->null_count = data.null_count != 0 ? kUnknownNullCount : 0;
return new_data;
}

std::shared_ptr<Array> Array::Slice(int64_t offset, int64_t length) const {
return MakeArray(SliceData(*data_, offset, length));
return MakeArray(std::make_shared<ArrayData>(data_->Slice(offset, length)));
}

std::shared_ptr<Array> Array::Slice(int64_t offset) const {
Expand Down Expand Up @@ -385,7 +384,8 @@ std::shared_ptr<Array> StructArray::field(int i) const {
if (!boxed_fields_[i]) {
std::shared_ptr<ArrayData> field_data;
if (data_->offset != 0 || data_->child_data[i]->length != data_->length) {
field_data = SliceData(*data_->child_data[i].get(), data_->offset, data_->length);
field_data = std::make_shared<ArrayData>(
data_->child_data[i]->Slice(data_->offset, data_->length));
} else {
field_data = data_->child_data[i];
}
Expand All @@ -410,7 +410,7 @@ Status StructArray::Flatten(MemoryPool* pool, ArrayVector* out) const {

// Need to adjust for parent offset
if (data_->offset != 0 || data_->length != child_data->length) {
child_data = SliceData(*child_data, data_->offset, data_->length);
*child_data = child_data->Slice(data_->offset, data_->length);
}
std::shared_ptr<Buffer> child_null_bitmap = child_data->buffers[0];
const int64_t child_offset = child_data->offset;
Expand Down Expand Up @@ -540,13 +540,13 @@ Status UnionArray::MakeSparse(const Array& type_ids,

std::shared_ptr<Array> UnionArray::child(int i) const {
if (!boxed_fields_[i]) {
std::shared_ptr<ArrayData> child_data = data_->child_data[i];
std::shared_ptr<ArrayData> child_data = data_->child_data[i]->Copy();
if (mode() == UnionMode::SPARSE) {
// Sparse union: need to adjust child if union is sliced
// (for dense unions, the need to lookup through the offsets
// makes this unnecessary)
if (data_->offset != 0 || child_data->length > data_->length) {
child_data = SliceData(*child_data.get(), data_->offset, data_->length);
*child_data = child_data->Slice(data_->offset, data_->length);
}
}
boxed_fields_[i] = MakeArray(child_data);
Expand Down Expand Up @@ -994,5 +994,4 @@ std::vector<ArrayVector> RechunkArraysConsistently(
}

} // namespace internal

} // namespace arrow
23 changes: 10 additions & 13 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,6 @@ namespace arrow {
class Array;
class ArrayVisitor;

using BufferVector = std::vector<std::shared_ptr<Buffer>>;

// When slicing, we do not know the null count of the sliced range without
// doing some computation. To avoid doing this eagerly, we set the null count
// to -1 (any negative number will do). When Array::null_count is called the
Expand All @@ -67,15 +65,15 @@ class Status;
/// could cast from int64 to float64 like so:
///
/// Int64Array arr = GetMyData();
/// auto new_data = arr.data()->ShallowCopy();
/// auto new_data = arr.data()->Copy();
/// new_data->type = arrow::float64();
/// DoubleArray double_arr(new_data);
///
/// This object is also useful in an analytics setting where memory may be
/// reused. For example, if we had a group of operations all returning doubles,
/// say:
///
/// Log(Sqrt(Expr(arr))
/// Log(Sqrt(Expr(arr)))
///
/// Then the low-level implementations of each of these functions could have
/// the signatures
Expand Down Expand Up @@ -146,6 +144,7 @@ struct ARROW_EXPORT ArrayData {
buffers(std::move(other.buffers)),
child_data(std::move(other.child_data)) {}

// Copy constructor
ArrayData(const ArrayData& other) noexcept
: type(other.type),
length(other.length),
Expand All @@ -155,15 +154,10 @@ struct ARROW_EXPORT ArrayData {
child_data(other.child_data) {}

// Move assignment
ArrayData& operator=(ArrayData&& other) {
type = std::move(other.type);
length = other.length;
null_count = other.null_count;
offset = other.offset;
buffers = std::move(other.buffers);
child_data = std::move(other.child_data);
return *this;
}
ArrayData& operator=(ArrayData&& other) = default;

// Copy assignment
ArrayData& operator=(const ArrayData& other) = default;

std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }

Expand Down Expand Up @@ -197,6 +191,9 @@ struct ARROW_EXPORT ArrayData {
return GetMutableValues<T>(i, offset);
}

// Construct a zero-copy slice of the data with the indicated offset and length
ArrayData Slice(int64_t offset, int64_t length) const;

/// \brief Return null count, or compute and set it if it's not known
int64_t GetNullCount() const;

Expand Down
15 changes: 15 additions & 0 deletions cpp/src/arrow/buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,19 @@ Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out) {
return AllocateEmptyBitmap(default_memory_pool(), length, out);
}

Status ConcatenateBuffers(const std::vector<std::shared_ptr<Buffer>>& buffers,
MemoryPool* pool, std::shared_ptr<Buffer>* out) {
int64_t out_length = 0;
for (const auto& buffer : buffers) {
out_length += buffer->size();
}
RETURN_NOT_OK(AllocateBuffer(pool, out_length, out));
auto out_data = (*out)->mutable_data();
for (const auto& buffer : buffers) {
std::memcpy(out_data, buffer->data(), buffer->size());
out_data += buffer->size();
}
return Status::OK();
}

} // namespace arrow
13 changes: 13 additions & 0 deletions cpp/src/arrow/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ class ARROW_EXPORT Buffer {
ARROW_DISALLOW_COPY_AND_ASSIGN(Buffer);
};

using BufferVector = std::vector<std::shared_ptr<Buffer>>;

/// \defgroup buffer-slicing-functions Functions for slicing buffers
///
/// @{
Expand Down Expand Up @@ -402,6 +404,17 @@ Status AllocateEmptyBitmap(MemoryPool* pool, int64_t length,
ARROW_EXPORT
Status AllocateEmptyBitmap(int64_t length, std::shared_ptr<Buffer>* out);

/// \brief Concatenate multiple buffers into a single buffer
///
/// \param[in] buffers to be concatenated
/// \param[in] pool memory pool to allocate the new buffer from
/// \param[out] out the concatenated buffer
///
/// \return Status
ARROW_EXPORT
Status ConcatenateBuffers(const BufferVector& buffers, MemoryPool* pool,
std::shared_ptr<Buffer>* out);

/// @}

} // namespace arrow
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/util/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ arrow_install_all_headers("arrow/util")
add_arrow_test(bit-util-test)
add_arrow_test(checked-cast-test)
add_arrow_test(compression-test)
add_arrow_test(concatenate-test)
add_arrow_test(decimal-test)
add_arrow_test(hashing-test)
add_arrow_test(int-util-test)
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/util/bit-util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,12 @@ Status TransferBitmap(MemoryPool* pool, const uint8_t* data, int64_t offset,
}

void CopyBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
int64_t dest_offset) {
TransferBitmap<false, true>(data, offset, length, dest_offset, dest);
int64_t dest_offset, bool restore_trailing_bits) {
if (restore_trailing_bits) {
TransferBitmap<false, true>(data, offset, length, dest_offset, dest);
} else {
TransferBitmap<false, false>(data, offset, length, dest_offset, dest);
}
}

void InvertBitmap(const uint8_t* data, int64_t offset, int64_t length, uint8_t* dest,
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/util/bit-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -708,11 +708,12 @@ Status CopyBitmap(MemoryPool* pool, const uint8_t* bitmap, int64_t offset, int64
/// \param[in] offset bit offset into the source data
/// \param[in] length number of bits to copy
/// \param[in] dest_offset bit offset into the destination
/// \param[in] restore_trailing_bits don't clobber bits outside the destination range
/// \param[out] dest the destination buffer, must have at least space for
/// (offset + length) bits
ARROW_EXPORT
void CopyBitmap(const uint8_t* bitmap, int64_t offset, int64_t length, uint8_t* dest,
int64_t dest_offset);
int64_t dest_offset, bool restore_trailing_bits = true);

/// Invert a bit range of an existing bitmap into an existing bitmap
///
Expand Down
Loading