Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 3 additions & 17 deletions cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,20 +39,6 @@ namespace compute::internal {

namespace {

Result<std::shared_ptr<Buffer>> GetNullBitmapBuffer(const ArraySpan& in_array,
MemoryPool* pool) {
if (in_array.buffers[0].data == nullptr) {
return nullptr;
}

if (in_array.offset == 0) {
return in_array.GetBuffer(0);
}

// If a non-zero offset, we need to shift the bitmap
return CopyBitmap(pool, in_array.buffers[0].data, in_array.offset, in_array.length);
}

// (Large)List<T> -> (Large)List<U>

// TODO(wesm): memory could be preallocated here and it would make
Expand Down Expand Up @@ -127,7 +113,7 @@ struct CastList {

ArrayData* out_array = out->array_data().get();
ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
GetNullBitmapBuffer(in_array, ctx->memory_pool()));
GetOrCopyNullBitmapBuffer(in_array, ctx->memory_pool()));
out_array->buffers[1] = in_array.GetBuffer(1);

std::shared_ptr<ArrayData> values = in_array.child_data[0].ToArrayData();
Expand Down Expand Up @@ -167,7 +153,7 @@ struct CastFixedToVarList {

ArrayData* out_array = out->array_data().get();
ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
GetNullBitmapBuffer(in_array, ctx->memory_pool()));
GetOrCopyNullBitmapBuffer(in_array, ctx->memory_pool()));

const auto& in_type = checked_cast<const FixedSizeListType&>(*in_array.type);
const int32_t list_size = in_type.list_size();
Expand Down Expand Up @@ -278,7 +264,7 @@ struct CastVarToFixedList {

ArrayData* out_array = out->array_data().get();
ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
GetNullBitmapBuffer(in_array, ctx->memory_pool()));
GetOrCopyNullBitmapBuffer(in_array, ctx->memory_pool()));

// Handle values
std::shared_ptr<ArrayData> values = in_array.child_data[0].ToArrayData();
Expand Down
49 changes: 28 additions & 21 deletions cpp/src/arrow/compute/kernels/scalar_cast_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,9 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
template <typename O, typename I>
enable_if_t<is_binary_view_like_type<I>::value && is_base_binary_type<O>::value, Status>
BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
using OutputBuilderType = typename TypeTraits<O>::BuilderType;
using offset_type = typename O::offset_type;
using DataBuilder = TypedBufferBuilder<uint8_t>;
using OffsetBuilder = TypedBufferBuilder<offset_type>;
const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
const ArraySpan& input = batch[0].array;

Expand All @@ -327,31 +329,36 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
}
}

ArrayData* output = out->array_data().get();
output->length = input.length;
output->SetNullCount(input.null_count);

// Set up validity bitmap
ARROW_ASSIGN_OR_RAISE(output->buffers[0],
GetOrCopyNullBitmapBuffer(input, ctx->memory_pool()));

// Set up offset and data buffer
OffsetBuilder offset_builder(ctx->memory_pool());
RETURN_NOT_OK(offset_builder.Reserve(input.length + 1));
offset_builder.UnsafeAppend(0); // offsets start at 0
const int64_t sum_of_binary_view_sizes = util::SumOfBinaryViewSizes(
input.GetValues<BinaryViewType::c_type>(1), input.length);

// TODO(GH-43573): A more efficient implementation that copies the validity
// bitmap all at once is possible, but would mean we don't delegate all the
// building logic to the ArrayBuilder implementation for the output type.
OutputBuilderType builder(options.to_type.GetSharedPtr(), ctx->memory_pool());
RETURN_NOT_OK(builder.Resize(input.length));
RETURN_NOT_OK(builder.ReserveData(sum_of_binary_view_sizes));
arrow::internal::ArraySpanInlineVisitor<I> visitor;
RETURN_NOT_OK(visitor.VisitStatus(
DataBuilder data_builder(ctx->memory_pool());
RETURN_NOT_OK(data_builder.Reserve(sum_of_binary_view_sizes));
VisitArraySpanInline<I>(
input,
[&](std::string_view v) {
// Append valid string view
return builder.Append(v);
[&](std::string_view s) {
// for non-null value, append string view to buffer and calculate offset
data_builder.UnsafeAppend(reinterpret_cast<const uint8_t*>(s.data()),
static_cast<int64_t>(s.size()));
offset_builder.UnsafeAppend(static_cast<offset_type>(data_builder.length()));
},
[&]() {
// Append null
builder.UnsafeAppendNull();
return Status::OK();
}));

std::shared_ptr<ArrayData> output_array;
RETURN_NOT_OK(builder.FinishInternal(&output_array));
out->value = std::move(output_array);
// for null value, no need to update data buffer
offset_builder.UnsafeAppend(static_cast<offset_type>(data_builder.length()));
});
RETURN_NOT_OK(offset_builder.Finish(&output->buffers[1]));
RETURN_NOT_OK(data_builder.Finish(&output->buffers[2]));
return Status::OK();
}

Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/compute/kernels/util_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,25 @@ ArrayKernelExec GenerateArithmeticFloatingPoint(detail::GetTypeId get_id) {
// A scalar kernel that ignores (assumed all-null) inputs and returns null.
void AddNullExec(ScalarFunction* func);

inline Result<std::shared_ptr<Buffer>> GetOrCopyNullBitmapBuffer(
const ArraySpan& in_array, MemoryPool* pool) {
if (in_array.buffers[0].data == nullptr) {
return nullptr;
}

if (in_array.offset == 0) {
return in_array.GetBuffer(0);
}

if (in_array.offset % 8 == 0) {
return SliceBuffer(in_array.GetBuffer(0), /*offset=*/in_array.offset / 8);
}

// If a non-zero offset, we need to shift the bitmap
return arrow::internal::CopyBitmap(pool, in_array.buffers[0].data, in_array.offset,
in_array.length);
}

} // namespace internal
} // namespace compute
} // namespace arrow
Loading