Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ static inline std::shared_ptr<ArrayData> SliceData(const ArrayData& data, int64_
length = std::min(data.length - offset, length);
offset += data.offset;

auto new_data = data.ShallowCopy();
auto new_data = data.Copy();
new_data->length = length;
new_data->offset = offset;
new_data->null_count = data.null_count != 0 ? kUnknownNullCount : 0;
Expand Down Expand Up @@ -482,14 +482,14 @@ DictionaryArray::DictionaryArray(const std::shared_ptr<DataType>& type,
: dict_type_(static_cast<const DictionaryType*>(type.get())) {
DCHECK_EQ(type->id(), Type::DICTIONARY);
DCHECK_EQ(indices->type_id(), dict_type_->index_type()->id());
auto data = indices->data()->ShallowCopy();
auto data = indices->data()->Copy();
data->type = type;
SetData(data);
}

void DictionaryArray::SetData(const std::shared_ptr<ArrayData>& data) {
this->Array::SetData(data);
auto indices_data = data_->ShallowCopy();
auto indices_data = data_->Copy();
indices_data->type = dict_type_->index_type();
std::shared_ptr<Array> result;
indices_ = MakeArray(indices_data);
Expand Down
11 changes: 8 additions & 3 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,14 @@ struct ARROW_EXPORT ArrayData {
return *this;
}

std::shared_ptr<ArrayData> ShallowCopy() const {
return std::make_shared<ArrayData>(*this);
}
std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); }

#ifndef ARROW_NO_DEPRECATED_API

// Deprecated since 0.8.0
std::shared_ptr<ArrayData> ShallowCopy() const { return Copy(); }

#endif

std::shared_ptr<DataType> type;
int64_t length;
Expand Down
60 changes: 60 additions & 0 deletions cpp/src/arrow/compute/compute-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,66 @@ TEST_F(TestCast, PreallocatedMemory) {
ASSERT_ARRAYS_EQUAL(*expected, *result);
}

template <typename InType, typename InT, typename OutType, typename OutT>
void CheckOffsetOutputCase(FunctionContext* ctx, const std::shared_ptr<DataType>& in_type,
const vector<InT>& in_values,
const std::shared_ptr<DataType>& out_type,
const vector<OutT>& out_values) {
using OutTraits = TypeTraits<OutType>;

CastOptions options;

const int64_t length = static_cast<int64_t>(in_values.size());

shared_ptr<Array> arr, expected;
ArrayFromVector<InType, InT>(in_type, in_values, &arr);
ArrayFromVector<OutType, OutT>(out_type, out_values, &expected);

shared_ptr<Buffer> out_buffer;
ASSERT_OK(ctx->Allocate(OutTraits::bytes_required(length), &out_buffer));

std::unique_ptr<UnaryKernel> kernel;
ASSERT_OK(GetCastFunction(*in_type, out_type, options, &kernel));

const int64_t first_half = length / 2;

auto out_data = ArrayData::Make(out_type, length, {nullptr, out_buffer});
auto out_second_data = out_data->Copy();
out_second_data->offset = first_half;

Datum out_first(out_data);
Datum out_second(out_second_data);

// Cast each bit
ASSERT_OK(kernel->Call(ctx, Datum(arr->Slice(0, first_half)), &out_first));
ASSERT_OK(kernel->Call(ctx, Datum(arr->Slice(first_half)), &out_second));

shared_ptr<Array> result = MakeArray(out_data);

ASSERT_ARRAYS_EQUAL(*expected, *result);
}

TEST_F(TestCast, OffsetOutputBuffer) {
// ARROW-1735
vector<int32_t> v1 = {0, 10000, 2000, 1000, 0};
vector<int64_t> e1 = {0, 10000, 2000, 1000, 0};

auto in_type = int32();
auto out_type = int64();
CheckOffsetOutputCase<Int32Type, int32_t, Int64Type, int64_t>(&this->ctx_, in_type, v1,
out_type, e1);

vector<bool> e2 = {false, true, true, true, false};

out_type = boolean();
CheckOffsetOutputCase<Int32Type, int32_t, BooleanType, bool>(&this->ctx_, in_type, v1,
boolean(), e2);

vector<int16_t> e3 = {0, 10000, 2000, 1000, 0};
CheckOffsetOutputCase<Int32Type, int32_t, Int16Type, int16_t>(&this->ctx_, in_type, v1,
int16(), e3);
}

template <typename TestType>
class TestDictionaryCast : public TestCast {};

Expand Down
38 changes: 18 additions & 20 deletions cpp/src/arrow/compute/kernels/cast.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,7 @@ template <typename T>
struct CastFunctor<T, NullType, typename std::enable_if<
std::is_base_of<FixedWidthType, T>::value>::type> {
void operator()(FunctionContext* ctx, const CastOptions& options,
const ArrayData& input, ArrayData* output) {
// Simply initialize data to 0
auto buf = output->buffers[1];
DCHECK_EQ(output->offset, 0);
memset(buf->mutable_data(), 0, buf->size());
}
const ArrayData& input, ArrayData* output) {}
};

template <>
Expand Down Expand Up @@ -199,14 +194,19 @@ struct CastFunctor<O, I, typename std::enable_if<std::is_same<BooleanType, O>::v
!std::is_same<O, I>::value>::type> {
void operator()(FunctionContext* ctx, const CastOptions& options,
const ArrayData& input, ArrayData* output) {
using in_type = typename I::c_type;
DCHECK_EQ(output->offset, 0);
auto in_data = GetValues<typename I::c_type>(input, 1);
internal::BitmapWriter writer(output->buffers[1]->mutable_data(), output->offset,
input.length);

const in_type* in_data = GetValues<in_type>(input, 1);
uint8_t* out_data = GetMutableValues<uint8_t>(output, 1);
for (int64_t i = 0; i < input.length; ++i) {
BitUtil::SetBitTo(out_data, i, (*in_data++) != 0);
if (*in_data++ != 0) {
writer.Set();
} else {
writer.Clear();
}
writer.Next();
}
writer.Finish();
}
};

Expand All @@ -217,7 +217,6 @@ struct CastFunctor<O, I,
const ArrayData& input, ArrayData* output) {
using in_type = typename I::c_type;
using out_type = typename O::c_type;
DCHECK_EQ(output->offset, 0);

auto in_offset = input.offset;

Expand Down Expand Up @@ -475,9 +474,10 @@ void UnpackFixedSizeBinaryDictionary(FunctionContext* ctx, const Array& indices,

const index_c_type* in = GetValues<index_c_type>(*indices.data(), 1);

uint8_t* out = output->buffers[1]->mutable_data();
int32_t byte_width =
static_cast<const FixedSizeBinaryType&>(*output->type).byte_width();

uint8_t* out = output->buffers[1]->mutable_data() + byte_width * output->offset;
for (int64_t i = 0; i < indices.length(); ++i) {
if (valid_bits_reader.IsSet()) {
const uint8_t* value = dictionary.Value(in[i]);
Expand All @@ -493,7 +493,7 @@ struct CastFunctor<
typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T>::value>::type> {
void operator()(FunctionContext* ctx, const CastOptions& options,
const ArrayData& input, ArrayData* output) {
DictionaryArray dict_array(input.ShallowCopy());
DictionaryArray dict_array(input.Copy());

const DictionaryType& type = static_cast<const DictionaryType&>(*input.type);
const DataType& values_type = *type.dictionary()->type();
Expand Down Expand Up @@ -565,7 +565,7 @@ struct CastFunctor<T, DictionaryType,
typename std::enable_if<std::is_base_of<BinaryType, T>::value>::type> {
void operator()(FunctionContext* ctx, const CastOptions& options,
const ArrayData& input, ArrayData* output) {
DictionaryArray dict_array(input.ShallowCopy());
DictionaryArray dict_array(input.Copy());

const DictionaryType& type = static_cast<const DictionaryType&>(*input.type);
const DataType& values_type = *type.dictionary()->type();
Expand Down Expand Up @@ -605,12 +605,10 @@ struct CastFunctor<T, DictionaryType,
template <typename IndexType, typename c_type>
void UnpackPrimitiveDictionary(const Array& indices, const c_type* dictionary,
c_type* out) {
using index_c_type = typename IndexType::c_type;

internal::BitmapReader valid_bits_reader(indices.null_bitmap_data(), indices.offset(),
indices.length());

const index_c_type* in = GetValues<index_c_type>(*indices.data(), 1);
auto in = GetValues<typename IndexType::c_type>(*indices.data(), 1);
for (int64_t i = 0; i < indices.length(); ++i) {
if (valid_bits_reader.IsSet()) {
out[i] = dictionary[in[i]];
Expand All @@ -627,7 +625,7 @@ struct CastFunctor<T, DictionaryType,
const ArrayData& input, ArrayData* output) {
using c_type = typename T::c_type;

DictionaryArray dict_array(input.ShallowCopy());
DictionaryArray dict_array(input.Copy());

const DictionaryType& type = static_cast<const DictionaryType&>(*input.type);
const DataType& values_type = *type.dictionary()->type();
Expand All @@ -638,7 +636,7 @@ struct CastFunctor<T, DictionaryType,

const c_type* dictionary = GetValues<c_type>(*type.dictionary()->data(), 1);

auto out = reinterpret_cast<c_type*>(output->buffers[1]->mutable_data());
auto out = GetMutableValues<c_type>(output, 1);
const Array& indices = *dict_array.indices();
switch (indices.type()->id()) {
case Type::INT8:
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/python/arrow_to_python.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out);
/// \brief Reconstruct SerializedPyObject from representation produced by
/// SerializedPyObject::GetComponents.
///
/// \param[in] num_tensors
/// \param[in] num_buffers
/// \param[in] num_tensors number of tensors in the object
/// \param[in] num_buffers number of buffers in the object
/// \param[in] data a list containing pyarrow.Buffer instances. Must be 1 +
/// num_tensors * 2 + num_buffers in length
/// \param[out] out the reconstructed object
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/python/numpy_to_arrow.cc
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ Status NumPyConverter::ConvertObjectStrings() {
// If we saw PyBytes, convert everything to BinaryArray
if (global_have_bytes) {
for (size_t i = 0; i < out_arrays_.size(); ++i) {
auto binary_data = out_arrays_[i]->data()->ShallowCopy();
auto binary_data = out_arrays_[i]->data()->Copy();
binary_data->type = ::arrow::binary();
out_arrays_[i] = std::make_shared<BinaryArray>(binary_data);
}
Expand Down